[ { "model":"amazon\/nova-micro-v1", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.1920266076 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.4391810036 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.1921503062 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.4121866696 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.2 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.0706883182 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.2475181808 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.1707669328 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.1135444374 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.294888625 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.0732341445 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.1712917218 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.5 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.1518475015 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.4409012481 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.2466291513 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.4728505876 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.2294045445 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.4952691669 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.3288016428 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.5463705477 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.0629352571 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.3681064162 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.0907131082 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.345867578 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.0515032587 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.3472277664 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.1851832776 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.4094860171 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.178673865 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.421571814 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.0893532438 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.322658401 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.2155508388 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.4627157806 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.2362014825 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.4311272979 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.1108973081 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.3531592039 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.1247431931 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.3827364935 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.0852543641 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.3968535114 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.1568859483 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.3854296145 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.1672670776 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.4344446004 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.1673485041 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.3498742372 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.2027365964 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.4429221375 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.2592014549 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.4502390132 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.245009056 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.4752285329 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.1249288306 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.4378159282 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.0418701115 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.227562406 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.0126447021 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.2329401033 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.1929725876 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.4742897968 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.2142339765 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.4337679078 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.2486422994 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.5046441311 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.390442231 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.6258445826 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.2411522305 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.4648713205 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.2526136998 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.4761366058 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.4829593782 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.6670785718 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.5654501085 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.7810071072 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.2334919219 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.488808171 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3534859166 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.5837022928 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.1673580411 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.4256978921 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.121266165 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.3348454203 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.2526168511 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.5000405768 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.1859093987 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.5327770983 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.2616894756 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.5082668021 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.4039820936 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.5963334297 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0342743018 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.2106722499 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0168251159 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.2071693978 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.2225633848 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.4680658654 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.1649991958 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.4328117039 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.1049906446 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.3579727145 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.105968367 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.3884447474 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.5 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.3412962275 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.5789604921 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.3866780476 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.60404499 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.1740026405 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.4261065659 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.0865662826 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.3708353537 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.2110505791 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.4739587631 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.258885113 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.4885220189 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.1741677254 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.4469342589 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.1716570673 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.5608938423 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.1185701002 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.3493767594 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.0477092787 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.2820590617 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.1533886213 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.3781904602 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.0407683795 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.3408267624 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.2125826846 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.4735941044 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.2872908558 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.5675711887 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.2197709837 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.4784848367 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.3426239655 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.4822747548 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.2192585886 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.4527368673 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.1076519805 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.4493011434 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.1468838458 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.4417745561 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.1037111406 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.3845090606 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.1705614622 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.4283589307 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.0591946627 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.2932951073 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.1691888664 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.4411352116 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.1416550774 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.3528205749 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.144249324 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.4191931285 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.2803945229 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.3626852212 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.0633143836 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.2651687739 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.020688049 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.2307402586 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.2269784465 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.4948809346 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.2820214504 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.5208833255 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.1601088672 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.4390231849 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.1901773558 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.4143082353 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.1257791635 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.3415200548 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.0274461195 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.3265811196 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.2611524911 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.4964180281 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.1563741006 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.4042462159 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.1387332093 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.4231622054 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.1326285083 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.3320956129 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.2497447823 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.5162841499 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.3139021174 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.6108991322 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.235010453 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.4387602841 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.0992435005 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.3897491958 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.216754292 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.4725649931 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.1116933816 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.3950140706 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.2226934995 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.4632319399 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.2791201051 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.5528589826 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.2 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.0909296921 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.2970179383 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.02807499 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.219484121 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0253971349 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.1941399108 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0169773321 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.1905807428 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.1621367612 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.4038308668 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.0908092738 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.3143126503 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.3548435009 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.6046727327 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.3341282505 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.4931240563 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.1867871917 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.4539322586 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.2742131221 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.5129096175 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.2534618489 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.4757478619 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.4150555406 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.632650236 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.1876189148 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.454947207 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.3342150948 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.57049006 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.173300895 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.4267275321 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.2934481188 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.5432096638 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.0583944161 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.3018524463 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.0428604601 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.2723962004 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.1924194773 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.4198584709 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.0594147664 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.2791093079 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.1888886982 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.4271659434 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.0950029931 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.2956403655 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.0476351796 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.2242691735 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.0203405417 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.2850725298 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.0944388832 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.306261789 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.019432628 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.2483400713 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.2062990967 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.4818899065 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.2783417409 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.4975137727 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.1360125442 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.3674257568 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.0850175921 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.3898736967 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.2441010155 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.4832568053 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.2973906807 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.5569302178 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.203432711 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.4661592161 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.260138004 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.5610336232 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.1686868634 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.4170694333 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.1076094956 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.4166151764 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.3031235597 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.537458124 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.3112392832 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.5195600347 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.1315355302 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.3725651966 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.0631259366 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.3114796779 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.1544266067 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.4093583062 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.2921490956 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.4738053288 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.0376554837 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.220689676 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.0245374067 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.1093782195 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.1911760417 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.4884230434 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.2744588637 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.5473239267 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.2342560425 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.4694733574 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.3127145891 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.5389265653 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.3 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0368191769 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.1231521557 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0009825493 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.0937946455 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.2338522454 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.4761317098 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.1016530484 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.3475324071 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.1532167967 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.4049784493 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.1825939096 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.4435726767 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.122224664 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.3790772862 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.1919159066 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.4399907204 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0652970567 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.2548173577 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.006043152 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.1774675407 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.1258893867 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.3740568466 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.1033449919 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1661073339 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.1178497705 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.3273836873 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.0233770563 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.2536020887 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0573424824 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.2409922496 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0316163135 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.1841004492 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.5 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.1573790422 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.438817852 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.1346642263 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2288901399 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.1734447333 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.4737881775 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.2855931625 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.3243323503 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.1225580295 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.3092331916 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.0262808074 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.2709457919 }, { "model":"amazon\/nova-micro-v1", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.5 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.2525776792 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.486411661 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.1689851292 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.4129981246 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.2332134247 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.423589823 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.1556181424 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.3940970742 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.3019856242 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.5356092631 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.2493197472 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.3683119816 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.3299261084 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.5833446367 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.2557354135 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.4932559294 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.2978564835 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.5565909038 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.432503176 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.6128170494 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.2313315741 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.4980876242 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1602739474 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.4283745115 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.2208466475 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.4480506021 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.2446811676 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.4668847579 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.2365169024 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.4798564418 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.1836576399 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.4135133991 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.3426000326 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.5515631826 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.1661968791 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.36455729 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.2545049288 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.4810751728 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.1944100883 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.4665838917 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.2285219894 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.494917535 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.2797028839 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.4981195393 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.2674142888 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.5117383385 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.2226695839 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.4175069959 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.3456828806 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.5745055104 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.3168780466 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.5146817021 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.4102088719 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.5919699107 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.3905880035 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.6171544436 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.2819272083 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.5597768822 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.2855875443 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.5213025666 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.3459871993 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.6001384205 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.4616988853 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.6321620897 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.3819883103 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.6006708192 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.487017155 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.6782242157 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.3137128529 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.547364909 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.3942999119 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.5751984516 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.5715619385 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.715520007 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.7318691008 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.8563332446 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.3643234323 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.5843649416 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3881940897 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.6134636944 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.3166464353 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.5644297126 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.255024921 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.4730105151 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.3710429385 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.6023461066 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.3249072169 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.6015751183 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.376865653 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.6004276916 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.528425696 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.7187146132 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0388587129 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.2391023347 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0240769455 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.243334857 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.2822570677 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.5115986572 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.1968109087 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.4530952838 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.2122035826 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.4411039967 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.2092794988 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.4991915153 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.4228838058 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.640023433 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.4313107714 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.6419340536 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.2742764457 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.5234337918 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.1283588392 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.3942937124 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.3239027756 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.5801190353 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.3518811457 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.5955885461 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.3600825798 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.6054355131 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.3725744078 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.6643987333 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.2270079348 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.4461810563 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.1880347324 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.4366454082 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.2896828865 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.5142198212 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.2227918044 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.5195207754 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.3487353423 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.5751528871 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.4005498625 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.6354726766 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.2982872033 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.5388133219 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.3041789231 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.4600408983 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.3464358022 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.5558199575 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.2492331002 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.5673549318 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.243733181 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.5336823494 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.3319552288 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.5710759927 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.3526008915 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.5987314155 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.2114985992 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.4380696418 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.3225451009 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.5503588307 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.3450200815 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.5340229728 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.2513328863 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.509018423 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.3099962758 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.3679934022 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.133543561 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.3430238701 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0554389677 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.3402266285 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.3820373565 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.5998907111 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.2728061501 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.4840324931 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.295109772 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.5448597381 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.247074884 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.4883086081 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.3323748277 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.5590337603 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.1910199162 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.5212112142 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.3430335831 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.5746721035 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.3481170694 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.5673214411 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.321620887 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.554989685 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.2947526867 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.4967353717 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.3837477301 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.6078820797 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.460911653 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.7361702362 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.3243318767 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.5325074594 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.29655196 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.5575622672 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.3727062795 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.6078730814 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.2761405776 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.5172440312 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.3073830454 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.5312334894 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.3738440243 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.6210548081 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.2080935284 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.4613292908 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.1142069107 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.4684888109 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.1248390462 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.3912118415 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0733481499 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.4043636025 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.3088186789 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.5586166118 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.2551093032 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.471197857 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.4788356583 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.6806459378 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.4407410774 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.6057762292 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.2858167001 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.5489970472 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.3573911021 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.5811687089 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.3641053048 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.588189418 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.5753469236 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.7440224371 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.3322929823 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.5688847284 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.5519246878 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.7223799311 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.2548042194 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.5077624586 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.4489469101 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.6629307467 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.241720034 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.490197442 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.1941190598 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.5032987767 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.3417012568 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.5625174346 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.3132737681 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.4785817971 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.2661551891 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.5088142958 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.2657051864 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.468771605 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.1950454148 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.4066164793 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.0895987522 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.406366105 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.2277966149 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.4790482859 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.1890846456 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.4927220926 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.3283448359 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.5971969841 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.4709197385 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.6367420245 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.273412678 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.5129343997 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.1877009474 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.5103267256 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.3590806785 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.5793899495 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.4522863769 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.69205378 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.3219627814 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.5598942303 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.4361318725 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.6665904527 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.2472520967 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.5258675516 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.3092184178 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.5908056148 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.4085013861 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.624313704 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.3558604021 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.5800045033 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.2686417951 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.5013057378 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.269846858 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.477780812 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.2834577064 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.5125776742 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.3998428237 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.5544961029 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.2654309389 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.4824817611 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.1862233406 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.2911678276 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.3108788704 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.5602956663 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.3518016236 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.5965516262 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.2801238065 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.5303841848 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.4297505232 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.6084569418 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.05869634 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.2062284007 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0559741426 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.2728382878 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.2899278495 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.5575393299 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.2752599873 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.476774558 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.2826575967 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.5327013244 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.3655163534 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.6164677172 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.3239526293 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.5501317095 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.395432451 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.6050844519 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.1463159063 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.3734663519 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.0899876038 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.3152591585 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.2446481916 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.4981230837 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.1046640677 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1573468803 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.2368457175 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.4726833185 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.0745513103 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.4280480618 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.1823100131 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.4063332022 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0722898622 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.2293754958 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.2476485874 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.4997416793 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.2660169184 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2991680484 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.27341846 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.541063718 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.3233289278 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.3475215495 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.3008140825 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.5503351929 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.3025859718 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.5616917702 }, { "model":"anthropic\/claude-3.5-sonnet", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.2714908932 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.4981839139 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.2421707351 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.4638593828 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.1552497705 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.3876789912 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.12404113 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.387786267 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.2654957101 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.5223894972 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.2492184328 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.3844495283 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.3358787565 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.5936351207 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.3077302936 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.5299097797 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.2889865542 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.5430128204 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.4267246672 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.6048255191 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.2209069897 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.4983774573 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.2238610606 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.4561674954 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.2218813556 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.4561380984 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.2970501456 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.5026533348 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.2349948321 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.4767507943 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.1835124052 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.3948207636 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.3743066609 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.5599845365 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.197638086 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.4275815242 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.2511263756 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.4884204513 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.185215113 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.4698145601 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.1867886283 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.48432494 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.2914923356 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.4904369651 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.2947535379 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.5165639924 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.2396652186 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.43949233 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.3082568982 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.5337051323 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.3355598159 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.5162264918 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.4122857574 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.6050858924 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.3836458269 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.6416210642 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.2735961462 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.5476293362 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.3182484892 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.5528408781 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.3514942306 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.6058081868 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.4019223656 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.592911966 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.3635164411 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.5991540113 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.5032720779 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.693325521 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.2964481503 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.5313927058 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.3878661569 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.5730182703 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.6109509541 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.7441302539 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.7189431005 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.858877842 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.3415212877 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.581216976 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3866651561 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.6216604607 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.2934935537 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.5521598139 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.2612039966 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.4687393359 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.3757506556 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.5844308819 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.3534386963 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.6302138792 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.3472418737 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.5869792648 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.55907943 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.7254469966 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0579558321 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.2491854022 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0644952316 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.27092494 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.3154133447 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.5290294486 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.2126420937 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.4630333055 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.1639811937 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.3928914747 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.1953419416 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.478395209 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.4173637317 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.6215184775 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.4359736097 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.6378773265 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.2576586101 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.4955502153 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.1623467534 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.4282927035 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.3311462156 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.5917802475 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.3618326454 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.5905610326 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.30984493 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.5816152273 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.4256755459 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.6937537754 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.2392256846 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.50697791 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.2113990452 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.4634979196 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.2765007451 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.4939949219 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.2531559761 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.5360266274 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.3367683936 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.5620478968 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.3820994257 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.617255004 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.3020669513 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.5652474506 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.2526786297 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.4189814818 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.3764722943 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.596660778 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.2446930524 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.5495728981 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.226882832 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.5274827881 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.2928073284 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.5503591158 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.3359656431 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.5832978038 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.2052531723 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.423812318 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.2624484733 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.5071140689 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.3198638529 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.5401137308 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.2455263458 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.5060005291 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.26830985 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.3779300192 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.0877409274 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.2860240482 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0878946168 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.3792924127 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.3575309079 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.5809284684 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.3412387019 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.5581098509 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.2941552983 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.5344011771 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.2473518914 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.5038938769 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.2928160974 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.531553217 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.2381371552 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.5343730926 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.3831267967 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.5877718214 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.3166439754 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.5664991748 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.3248935831 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.556535727 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.2773274773 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.5134253387 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.3902392905 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.6255884221 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.4320552013 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.7162987249 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.2973625302 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.5304507919 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.2814844596 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.5325214597 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.3423342516 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.5714046498 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.3073801688 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.535796278 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.2915844659 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.5233082306 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.3496367393 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.6133721509 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.1393773943 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.3742917816 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.1302935996 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.4828092948 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.1183565284 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.3604387918 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0926735247 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.407269173 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.3454626797 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.5668266666 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.2472009189 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.455460052 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.4397356804 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.6588393086 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.4762161169 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.6165646404 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.2837156349 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.5580363271 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.3506539621 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.5826652331 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.3702288509 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.603198011 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.5348426193 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.7292524118 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.3025325263 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.5428889158 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.5423330128 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.7003733903 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.254709734 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.5378272729 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.4143683284 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.6253967915 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.1962212986 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.4367493461 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.1929905791 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.5044002449 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.2918138373 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.5417251668 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.3037847598 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.4992460758 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.2617759894 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.5276633986 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.2777846992 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.4645319126 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.1183361048 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.3272094202 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.079124328 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.3902046622 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.2117678963 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.4359443689 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.1732885789 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.4964169161 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.3255971416 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.6008485267 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.4485218422 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.6288500197 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.2558609607 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.4641225577 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.2212772916 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.5309758013 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.3158889064 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.5748466359 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.4289796381 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.6719817133 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.2686859348 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.5049847235 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.3662710201 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.6470689802 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.276310564 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.5265133216 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.3356818462 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.5995810459 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.4154607989 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.6309805607 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.3267961489 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.5560375728 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.2492030159 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.4785567235 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.2416391156 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.4777363194 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.2828625213 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.5389976055 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.379459074 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.5425233372 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.2043216296 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.4630588295 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.170099018 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.285966574 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.2999115931 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.5465058684 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.3389488489 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.6048347469 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.281242043 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.5334289579 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.429141026 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.5970789076 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0335871262 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.1685656521 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0459514881 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.2949770962 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.2486274521 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.508189485 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.2880255112 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.4836035417 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.2152838338 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.4431377684 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.3439565173 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.614390118 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.3184530249 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.5591378012 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.3979752911 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.6309674882 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0582735609 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.3023435605 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.078527211 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.3202313873 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.2575010745 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.5010134287 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.1433567699 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1989285088 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.1807854017 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.4444459462 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.097668655 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.4425301092 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.138613234 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.3633380586 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.1255527769 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.2943453041 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.2202422573 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.4809857806 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.2522298384 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2981672562 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.2355051896 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.5276826547 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.3416160138 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.4052096434 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.3330382198 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.5839627022 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.3449975051 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.5825497566 }, { "model":"anthropic\/claude-3.7-sonnet", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.2669835517 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.5240335993 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.2270338391 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.440172001 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.1929300991 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.4140331595 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.1217632337 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.4239570091 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.2668020262 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.5382720996 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.2942451115 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.3948537197 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.3183719205 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.5861114184 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.3080791098 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.5400496227 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.3007148198 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.56125031 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.4435216687 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.6147226174 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.1875598171 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.489336688 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1959881242 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.4184181558 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.226812714 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.4624634504 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.2455579306 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.4672104169 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.225211407 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.475337495 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.0987347036 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.3710433705 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.3664463476 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.5637794084 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.2102096564 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.4016844833 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.2623718084 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.503088988 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.1154914703 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.3988570155 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.1615117356 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.4748907807 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.2424817869 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.4495742511 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.2787657292 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.5218950872 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.2026807977 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.4168764189 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.3311551104 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.5619474693 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.3738906354 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.5465489645 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.4093718231 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.624497016 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.3928115555 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.6357826484 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.2618384456 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.5051359715 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.2370701457 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.5093826491 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.3614997929 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.6075438302 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.4250759164 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.6148435167 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.3322365647 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.5893045134 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.5389893408 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.7126873721 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.3388316195 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.559085934 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.3169438238 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.4981307307 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.4825264923 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.68082594 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.5992232007 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.8046234958 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.3430620741 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.5743349585 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3926109516 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.619328646 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.3210865047 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.5628291341 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.2322885818 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.4668783462 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.4168839668 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.6386977285 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.3546234926 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.6240074261 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.3292803051 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.5747992621 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.5300268114 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.7191025853 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.10328558 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.2982675123 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0624940624 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.2487717813 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.3449460589 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.553592379 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.1978537012 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.4545082837 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.2192965513 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.4642021162 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.2765720113 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.5462639917 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.4144634414 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.6214654965 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.4534129099 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.6589590592 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.2772612689 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.5374508975 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.1221025047 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.3885482618 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.3393506808 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.6050917672 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.3513113523 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.6039205342 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.3412154588 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.5991114384 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.4209501123 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.6893145815 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.2551553778 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.5138522649 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.2850564276 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.5207891639 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.2687344116 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.5272540777 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.2521419676 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.5246932394 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.3022388162 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.5435384668 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.3764287035 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.6223142999 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.2645654805 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.5314828839 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.3107372447 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.4566077399 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.3598923473 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.5731300576 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.3220023978 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.5987896066 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.2249890784 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.5034200882 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.3391843994 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.5707828412 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.3651900786 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.6151464618 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.1894654695 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.4093754295 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.2918136532 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.5456800793 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.2975642517 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.5201018846 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.262347554 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.5086539396 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.324863236 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.3928185911 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.0865732382 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.3303494458 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0819614343 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.30926584 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.3814378567 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.5990132828 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.2914418132 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.5356815834 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.3426987765 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.5951472349 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.2551863067 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.5195593791 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.2538113904 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.5210775783 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.2197400214 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.5546741997 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.3912899265 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.6091996357 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.3473927547 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.5540996255 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.3219708707 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.5658325036 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.267663768 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.4820888027 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.3813299088 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.6248613325 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.5190281346 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.7387347937 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.2217546445 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.4614867807 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.2018629647 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.42603146 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.3638956194 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.6069685866 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.2818004563 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.520988987 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.2888744227 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.5305153826 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.3314709774 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.5984996024 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.2128497133 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.4631762575 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.170528296 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.5195265013 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0865227031 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.3281312148 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.056144723 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.3812309298 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.3285408012 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.5645168724 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.2820925848 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.5101934539 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.2859396371 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.5523733153 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.3163285848 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.5051708575 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.2442291975 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.5262128573 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.2978567509 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.5693090483 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.3458915528 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.5893254106 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.4849210354 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.6981449573 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.317731832 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.5613754153 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.4713164151 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.6495822688 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.2503325982 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.5416976917 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.3948676748 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.6159422103 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.2295101341 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.4799966973 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.2510859963 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.5565619536 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.3515638071 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.5824484364 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.3241789047 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.4829492302 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.2746497811 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.5372002467 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.3076981818 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.4873474492 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.1299381077 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.3788726193 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.1912277179 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.4950017684 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.2097508574 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.445386701 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.1991170213 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.4930051732 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.2774555913 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.5562717021 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.4106937329 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.6058388421 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.2967826709 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.5262571298 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.2209836503 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.5082942096 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.3011367305 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.564663941 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.448345108 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.6666956471 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.3244718268 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.5658165542 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.407475727 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.6425140836 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.2859927313 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.543203281 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.3180064929 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.5878963723 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.457193264 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.6605869611 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.3695252842 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.5799625426 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.2009085121 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.4600287687 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.2198024006 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.4640276677 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.2907627934 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.5558044872 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.3933820255 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.5410635816 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.1783537399 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.4347699538 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.1382203867 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.2283013271 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.3344661609 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.5823607578 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.3519575693 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.5873812009 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.231149484 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.5147846224 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.4089741506 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.586691795 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0407386888 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.1524708774 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0615467888 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.2411879984 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.2976308897 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.5789469354 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.3161395969 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.4958567702 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.2607779047 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.5026594084 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.3155204999 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.5892994562 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.2266398655 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.4844747718 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.3284536924 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.5738443768 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0897090973 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.3056008006 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.1036046651 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.3287187985 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.2179300592 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.5011175701 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.1863553829 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.2320124968 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.2455042454 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.4980044603 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.1581907622 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.4829223036 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.109344665 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.3572196014 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0982287816 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.2450420475 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.2396091802 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.5123401487 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.2229490278 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2797129431 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.2637315312 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.550920529 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.3126940794 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.3809708703 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.2898457226 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.563083348 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.2463331565 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.5526475142 }, { "model":"anthropic\/claude-sonnet-4", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.2057963854 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.4190801172 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.1996247285 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.4442043679 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.1217071927 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.328265152 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0291184089 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.3034981634 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.2026417516 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.463337874 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.1217867685 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.2662662886 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.2523784543 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.5195328715 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.2422451252 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.5264317244 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.2660357996 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.5489237487 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.3431691167 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.5659025601 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.1411356359 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.4480483081 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1518080432 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.3858829748 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.221843468 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.4559655934 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.1965127641 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.4448745325 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.2397069931 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.475288864 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.1580418587 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.3840415666 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.3229660121 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.5510776215 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.1731926352 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.3746553107 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.1557419708 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.4249908572 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.1551519146 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.4046874828 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.1827705659 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.4758300316 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.2887000518 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.4689651175 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.2524146198 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.4953606649 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.1826604742 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.3887388562 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.2999810338 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.5315905896 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.3568113924 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.5379558638 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.3821149754 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.5961975536 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.346540273 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.6141374461 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.1899525093 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.4989816408 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.1315130933 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.4341433104 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.3339653303 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.5865996685 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.4005566788 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.5928360984 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.3410327538 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.5702449417 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.4621824412 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.6644677733 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.3296804956 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.5432353476 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.3361581186 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.528779004 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.5530727537 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.7075859327 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.6368037655 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.8170495194 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.3558328881 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.5816613686 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3018683161 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.5862734644 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.2520796337 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.5331446204 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.1921744404 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.407787256 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.3243217549 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.5392052945 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.3295144102 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.5926958618 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.3482763208 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.5982327963 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.452269216 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.6590312745 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.038115978 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.2335371377 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0230267925 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.2225585574 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.2919455567 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.5131981959 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.1292315656 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.4092770954 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.1854918728 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.3934600154 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.266974078 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.5393383261 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.3747394322 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.6079633657 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.4420465241 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.6372857982 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.2331017118 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.4736195736 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.1022265448 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.3883925189 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.2769514475 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.5440818488 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.3439976656 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.583941298 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.3120299438 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.5421647326 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.3139800299 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.6305869448 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.1948861013 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.4359300942 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.2500136994 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.4929182362 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.1414246135 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.400392952 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.1754521219 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.4706209345 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.2726271593 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.5135668055 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.3420319137 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.6095914494 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.2121435327 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.5165098934 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.3965805608 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.5197322727 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.2594796679 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.4780086047 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.3009966401 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.5761529867 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.2316697911 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.4912151365 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.2880260216 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.5039209227 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.2667770035 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.5371226098 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.1137141251 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.3542419226 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.2597105982 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.4929733942 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.1901313405 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.4793446685 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.2650641815 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.5151986111 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.2220814456 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.318063422 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.0836411722 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.2984177736 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.043655724 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.2335929953 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.3229689104 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.5803873354 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.2069505913 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.489819774 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.2481653983 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.5271356217 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.182427997 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.4654775647 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.1686533634 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.3897581367 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.1950122982 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.4912322205 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.3261687004 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.5570892069 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.3239362726 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.5507453743 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.2536168105 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.5193737639 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.2743904396 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.4918283752 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.2986347511 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.5339411724 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.3783664491 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.6540376697 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.2199531275 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.4736214414 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.2571304866 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.5221658577 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.3141227729 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.545378522 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.2384329055 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.4946334042 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.2960130626 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.5398408986 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.2642890319 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.5934736192 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.1039040494 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.343251081 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.0893012181 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.42437101 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0512543236 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.2613232237 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0260385015 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.3274101513 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.271766815 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.5041819226 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.1870998997 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.4329741844 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.4307107958 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.6547934264 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.3789232965 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.5490387026 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.2715804037 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.5307525116 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.3285527346 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.5692291394 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.3491027097 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.5771246685 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.3737440632 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.6547073078 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.2650680958 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.5205343441 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.4511284085 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.6282365697 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.2584319121 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.5177022275 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.371218137 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.5948906549 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.0926162696 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.3328045162 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.1301545031 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.4379044144 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.2619487807 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.4937679545 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.2097242522 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.4402768379 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.2206007581 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.4841625224 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.2418583032 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.4464397214 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.0858734443 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.3340485034 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.1646186552 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.4670539114 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.2147068836 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.4416005881 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.1631062755 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.4541385931 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.2705284479 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.5472822854 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.3949384152 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.5962190033 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.247144381 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.472335293 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.2147391336 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.5213195361 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.3112375203 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.5600711232 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.3915983131 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.6470079791 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.2583542451 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.5157482031 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.3458513734 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.634706105 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.2339300786 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.4691042738 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.2567323645 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.5361849509 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.3887890595 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.6100201392 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.3146346509 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.5476489231 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.1544230564 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.4169441821 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.203628947 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.4354012087 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.2554271374 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.5059580785 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.3635410685 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.5132811401 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.1818206483 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.3817278149 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.1031390295 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.2139568479 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.3173811815 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.5742755278 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.3658586977 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.5886644893 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.2739927286 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.5313085407 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.3396470191 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.56896944 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0391048872 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.1804054377 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0422939527 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.2015864716 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.2324983634 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.497221173 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.2976387517 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.4761547661 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.2305488159 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.4723700911 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.2058479152 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.4902380763 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.3027505857 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.5506378818 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.3708866541 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.5846851624 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0772031909 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.238183844 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.0504923015 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.3006315368 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.2388037754 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.5072972409 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.1136417481 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1766903691 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.1425657247 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.3799312791 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.0602102371 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.4158042285 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0751924362 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.27475056 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0837679098 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.2484365945 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.143983328 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.3955437811 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.2002778421 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2907084137 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.2377253988 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.5405656214 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.2660263348 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.3459946232 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.1095698729 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.379524938 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.2569801761 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.5416847655 }, { "model":"deepseek\/deepseek-chat", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.2340706769 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.4849021224 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.2324871288 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.463163379 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.1145237929 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.321984884 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0457100188 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.345736773 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.2016755199 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.4502815524 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.1687061726 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.2849440478 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.2569252635 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.5329928091 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.2423805131 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.5144112629 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.2993048546 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.5651988199 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.3853695566 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.602672086 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.1414242697 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.4491969143 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1969875411 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.4112680984 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.1819096557 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.4374822654 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.2316387528 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.4676592617 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.2150708922 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.4723591307 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.1405266408 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.3821492664 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.3061008878 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.5403845189 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.1651572659 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.3834555839 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.1950569484 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.4560500844 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.1448904562 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.4007813245 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.1568183376 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.4733150063 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.2952652338 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.4756055948 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.2098690628 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.4737398201 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.1892048942 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.3849575805 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.2798537803 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.5394779979 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.3719179468 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.539682577 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.40271244 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.6036828752 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.3295443052 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.592814404 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.1908998381 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.4722273522 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.1077730104 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.4189694789 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.3296294187 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.5743852794 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.3965244172 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.6187707189 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.3207762021 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.5555389401 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.4227752207 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.6601015066 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.3139195907 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.5175917627 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.3178080544 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.5080472014 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.5548034204 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.7071106777 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.6432470265 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.8278285651 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.3223436235 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.5379000389 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3061941236 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.5918382188 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.3294693656 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.5673243159 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.1938427279 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.4526315895 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.3412226864 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.5602175563 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.2910894115 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.5672691361 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.3297556296 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.5925809306 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.4619761505 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.6734036273 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0307272557 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.2077475007 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0256705679 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.2168141904 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.3055790363 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.5265498141 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.1365241949 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.4093204393 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.1778454432 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.4028796881 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.2463440773 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.5200026897 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.3412169553 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.598919602 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.3937102354 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.6229439454 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.2297889676 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.4704431893 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.1375213911 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.388908417 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.2789946732 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.5442737128 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.3066060037 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.556064896 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.3020610187 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.5462026627 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.299038365 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.625735911 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.1946429546 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.4570475303 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.2287931181 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.4943000447 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.233903322 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.4813311361 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.160985695 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.4538812051 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.254215081 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.5128620442 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.3318074211 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.5973973733 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.1875636541 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.5136106256 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.3333377273 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.4709407515 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.2789250445 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.52614288 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.2580648249 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.574708573 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.18097458 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.4665455335 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.1950643939 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.4676749835 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.3008270138 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.5432166189 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.0884771533 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.3265400527 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.2431929513 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.4397197217 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.1999599641 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.4781553813 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.2497463416 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.5083726446 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.2002123483 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.2845065116 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.0855626682 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.2894501335 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0163561936 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.2383002969 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.3116845131 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.5936722206 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.2244694024 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.491879277 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.3230054961 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.5636252799 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.2128915517 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.4613197046 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.2499065804 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.4673527976 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.1281964384 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.4588308902 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.319035437 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.555554753 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.2835968152 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.5094572017 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.2487969868 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.518708582 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.2238578938 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.4748109447 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.3058774517 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.5603224049 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.3684068806 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.6535736283 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.2253225205 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.4798221167 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.2383027705 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.504994716 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.3350990447 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.5554923615 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.2492753068 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.5052232921 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.2733774487 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.4886433877 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.2525535773 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.5869217143 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.1449724535 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.3965148993 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.0942041621 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.4155041047 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0235872225 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.2475231508 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0365961569 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.3050512265 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.2767258101 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.514091898 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.1796236972 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.4422888692 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.3823229705 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.6453681393 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.3548387061 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.5492435889 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.2581682802 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.5198796684 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.2662027737 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.540420297 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.3091555064 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.5528775735 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.3860807525 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.6710753294 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.2477037529 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.5045143807 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.394880747 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.6072982987 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.2464304597 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.5343201712 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.3592456339 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.5816925415 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.1293187691 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.3738214096 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.1003361282 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.3952274191 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.2486377856 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.5019133104 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.2573787999 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.4669380076 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.257967718 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.5080229639 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.2025556713 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.3947833 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.0690009512 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.3268004816 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.1294343719 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.4480995236 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.1898460053 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.4405765457 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.1095645758 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.4118027966 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.2800732787 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.5502272532 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.4072726699 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.6058201233 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.2040949055 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.43974538 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.2090628208 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.5097240815 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.2774768567 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.5443247574 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.3840976738 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.6378529698 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.2438930348 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.5213646779 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.2864351463 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.6249321785 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.2312626914 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.4818505098 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.2916695233 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.5596704495 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.3464968589 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.5970733128 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.2948086539 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.5417485172 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.1765221595 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.4369131192 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.2151708901 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.4392843531 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.2555109482 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.4905742401 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.3470151937 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.512427307 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.1515985315 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.3742059137 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.1108390908 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.210084949 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.3319169877 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.5670040682 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.3882912951 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.6304381337 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.2755215402 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.5261420761 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.3549575463 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.5828055284 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0366134631 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.1741100437 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0154547723 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.1399251318 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.2290327476 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.5229250115 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.3122409611 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.4921734247 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.2244630159 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.4814457852 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.2007945741 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.4705479648 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.2150236607 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.4970978512 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.3603191861 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.5938509481 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0717040801 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.2570725566 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.0371460136 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.2645911946 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.2230132444 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.49266873 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.0804541385 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1593791779 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.1516053677 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.3870787615 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.051561205 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.3871526823 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0693546179 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.2831628097 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.1008748312 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.2529276987 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.1997334357 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.4568839976 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.2322349452 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.3049134513 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.2350235637 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.528086246 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.2524231151 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.3456705882 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.1624728483 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.4496327865 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.2537223237 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.5463123746 }, { "model":"deepseek\/deepseek-chat-v3-0324", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.2402975983 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.4194922076 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.1338075038 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.3511060104 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.1357614328 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.3566810684 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0294893106 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.2512539061 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.1512958639 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.3039734334 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.0801602615 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.1225273024 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.1445854242 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.3358973891 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.1013154049 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.3155908724 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.2238237549 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.4534366926 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.2689055687 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.4149416248 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.0975874673 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.3248757407 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1178477307 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.2954831248 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.159326316 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.382299198 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.1975279012 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.434347868 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.1616056325 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.333451919 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.0517558436 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.1667611675 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.1822663929 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.3603211978 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.0853061805 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.2591570919 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.156849916 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.3691380603 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.1100740183 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.3479696433 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.1450162321 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.4059490259 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.2328037895 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.3851910422 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.1735533986 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.393764966 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.1575864364 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.3025413929 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.2443635406 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.4676159664 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.2223182846 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.3685961254 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.3180611809 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.5065841887 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.2680634152 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.5259443653 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.1699873084 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.4049081719 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.1497155398 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.3184438517 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.2338536957 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.4766184042 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.2064068309 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.3746311154 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.2633055293 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.451378667 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.2576565152 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.3874723625 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.262811264 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.4775521011 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.1965649232 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.3326158945 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.3974650186 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.5373829936 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.5868227988 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.7574063883 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.2163623393 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.4705179867 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.2458237388 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.426538099 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.2533539434 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.4119435555 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.1546982368 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.461201833 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.2168672818 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.4298161123 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.2544899664 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.4510886635 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.3182982487 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.5389072956 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.47314841 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.6884710951 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0355335694 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.1603046868 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0146310492 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.1847185113 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.2054043097 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.3989649156 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.0395886562 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.1860957619 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.1042634561 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.2700025792 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.2494923018 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.4977126554 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.2684001499 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.416230929 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.2827365983 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.5128198247 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.1483876396 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.3041127486 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.0867728202 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.2892236166 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.2451203581 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.4667806078 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.2680074322 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.4714945694 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.223787985 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.485186041 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.3571004344 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.6398491182 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.1560495384 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.3475080534 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.1427020575 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.2902772917 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.2002526169 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.4138800613 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.1417817824 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.3836219075 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.2225226541 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.4359559623 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.2708679556 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.4908463656 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.1711796281 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.4108641598 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.2194500975 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.3589526769 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.1994883012 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.3942042616 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.2570478693 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.5215463463 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.1546042692 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.4407575564 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.2471551193 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.4847447773 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.273955881 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.5078342939 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.05512322 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.1415530353 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.1622823381 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.3112906344 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.0777950259 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.286449259 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.2242267538 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.4739719705 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.2193704377 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.2898139055 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.1346054696 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.3409559995 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0394802393 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.2573657649 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.1819054463 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.3169358876 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.104449722 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.3335661802 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.1790591986 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.3273464644 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.0709002184 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.2564650613 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.1088802366 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.3340713822 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.138140887 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.5001663831 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.2496973594 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.435485932 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.1379373956 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.2513871995 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.1828389227 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.3179756072 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.2129586558 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.4314516197 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.24401684 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.4592926922 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.3739586622 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.6802015628 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.1237069224 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.3000426144 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.0752750224 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.1972354123 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.2657383448 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.5050071583 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.1687285867 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.4021301132 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.2202972405 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.4092623804 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.3159175655 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.5588876314 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.092498489 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.2505889593 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.0281527677 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.1893859434 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0257539048 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.2236454943 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0203648136 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.2172604464 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.1894457708 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.380925147 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.0698032229 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.2623376551 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.252814761 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.4131775231 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.2671823746 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.3966391033 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.2278356993 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.4590175615 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.2772231531 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.4560149918 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.2348823133 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.4500923911 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.4182790857 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.649967582 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.2343936577 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.4827310176 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.3388526407 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.5454578721 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.1839314203 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.4234191674 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.3489148579 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.5567945257 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.1402474958 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.3500619576 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.0777850092 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.3273785033 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.1949440941 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.4275372517 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.1720625024 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.387179761 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.2272550261 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.5004185979 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.1082324911 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.2676221295 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.122708093 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.3763985899 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.1795400131 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.4606246254 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.1736253216 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.3845743827 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.1212907088 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.3790107218 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.2260669876 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.4794686178 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.4126611726 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.5967801454 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.1563243249 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.3099234307 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.2041414382 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.5298340938 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.2570489843 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.4028888696 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.2324980283 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.4203934844 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.2041105012 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.3657796945 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.3130978532 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.5950962977 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.2525982324 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.4605822105 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.1739645144 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.3984988334 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.1200373123 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.3196364935 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.1814754432 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.3157059838 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.187751348 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.4405758845 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.2191046369 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.4677894227 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.1984127492 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.4104368787 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.2929382742 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.4500167319 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.1223581489 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.3679278604 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.0119991714 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.0656655661 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.264346972 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.4993975063 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.31801505 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.5317972494 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.2705929623 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.5134621473 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.256831195 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.4554987689 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.0897426047 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0174118264 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.1924672099 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.181272453 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.386776605 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.2603415771 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.4241396601 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.1963447008 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.4236533517 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.1559833307 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.3946264183 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.1705385375 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.3747437419 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.257036702 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.4598854693 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0825950269 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.2748258429 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.039067574 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.2124733373 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.192705772 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.444563462 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.078814153 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1049283878 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.0644184223 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.2244706008 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.0232220251 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.2052740772 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0620084814 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.2964981916 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0414688547 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.1387297621 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.1352563368 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.3763213166 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.2591777223 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.3119832776 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.2137844239 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.5028557922 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.3129908127 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.3710290799 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.1943812143 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.4323832185 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.094824393 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.2713939288 }, { "model":"deepseek\/deepseek-r1", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.2072083108 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.494326253 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.1112235198 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.3299787275 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.0771449577 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.3001544411 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.034106218 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.2505188758 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.1799007611 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.4327545103 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.1112135368 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.2588501418 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.1978004928 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.4602046776 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.1107790987 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.3358115304 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.2373672543 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.5260757727 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.3285274303 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.5590237808 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.1460355551 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.4052234374 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1252913378 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.3214539752 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.2406381299 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.4810229449 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.2249075936 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.4437585001 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.2160501071 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.4706714315 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.0795746221 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.3174359519 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.1884259335 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.3764386215 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.0405514883 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.171412569 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.1808592893 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.4337443828 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.1334340896 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.4279306348 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.1870340741 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.4752318502 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.1863630148 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.3895554099 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.1603232803 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.3829882205 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.1004543306 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.2090205571 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.251982914 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.5269588388 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.3081294684 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.5221618044 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.3414011031 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.5397513615 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.3050701984 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.5811833775 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.1987665104 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.4807089369 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.087795256 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.3788573069 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.3390983713 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.5924658961 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.3162103957 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.555772337 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.3226520344 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.5750653902 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.3833943767 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.5822540388 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.2910030635 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.5391676429 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.3135062284 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.503706011 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.5414890567 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.6901603131 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.65136344 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.814288256 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.2479212607 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.4998884286 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.2970650759 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.5373052889 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.2793086929 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.4720611769 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.1318325912 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.3864569881 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.3666373087 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.5839902989 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.2824386471 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.5388064333 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.3186845256 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.5933794038 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.4349494723 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.6079740627 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0254287526 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.1905763319 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0134279826 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.139589465 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.1773927146 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.3630036378 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.0949811313 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.3340540429 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.1872829527 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.3848483899 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.2170056607 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.485021658 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.3564149867 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.5972656918 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.3029237977 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.5276781303 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.2011905527 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.4093497027 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.0276458775 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.1587376386 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.2716919376 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.5367224263 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.3302090182 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.5714655622 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.2876911945 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.5482159609 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.2940448188 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.5448923741 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.176124281 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.4444880058 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.1541945773 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.3924489747 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.188709393 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.4084479035 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.1138525523 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.3577532211 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.2897223986 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.5436301176 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.2571014471 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.4731076434 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.1827387853 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.4769620326 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.0613359658 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.2105103816 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.2802837747 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.4893845985 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.1179961209 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.3697436656 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.1795230257 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.4524836975 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.2168596976 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.4205029389 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.2464293328 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.4995807582 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.0931324834 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.3308191122 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.2346057729 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.4744017815 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.1793887241 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.4368728644 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.2249815138 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.5077874682 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.2125650621 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.2904132435 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.1190564309 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.3143397764 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0335265947 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.1851037404 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.262210271 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.4965376896 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.0388474827 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.1762583779 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.2204139642 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.45446535 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.0629224316 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.2969811617 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.1410564792 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.4583101239 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.1751098097 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.4545492979 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.3249125796 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.5545635633 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.2625209874 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.5042437741 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.1520275352 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.3905524229 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.0999041852 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.3246573528 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.3358829505 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.5672747548 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.3376198793 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.6603531936 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.2178007242 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.46765621 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.2366406548 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.5016732556 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.267593114 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.4875681627 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.1669756152 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.397363268 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.2018833256 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.4087945979 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.2219340338 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.4586860241 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.1638168729 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.3734737626 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.065274449 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.2801240967 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0385470126 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.298290272 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0422110832 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.2965714462 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.2703816733 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.5070503073 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.1229430143 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.3831192143 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.361635926 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.5409988692 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.3917357782 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.5329271965 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.26251395 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.5352694678 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.3264296122 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.5528035231 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.3390879053 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.5804013742 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.3506563803 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.5763378703 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.2742281839 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.5442712896 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.4398120524 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.6249092429 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.2246840158 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.5244113055 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.3594079605 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.5726823578 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.1637669376 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.4551543683 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.1390837831 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.3946845887 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.1351642547 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.419979284 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.1783822717 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.3420045131 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.2141579133 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.4737980192 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.1530398832 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.3776796544 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.1442994366 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.3828616588 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.1186331454 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.3757270357 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.174243664 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.4076535095 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.1409080164 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.4221024153 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.2784378348 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.5549743668 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.3456876809 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.5621225861 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.20948768 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.4445988225 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.1573888419 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.3946820302 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.3413304111 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.5865784347 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.2452155196 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.5046140378 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.2074916818 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.4529002152 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.3320288719 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.6201778863 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.2510503336 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.4902642703 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.2210031154 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.4920579152 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.3219720359 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.5542555367 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.2813682269 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.5156714123 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.2423449507 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.5073629744 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.2856584071 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.4617854306 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.1265658126 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.3696106678 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.053891041 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.1488881792 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.2862677011 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.5325160504 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.3443407299 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.6019992774 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.2507417591 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.5348282824 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.3383254859 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.5585953363 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.2049672714 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.4409541903 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.291136983 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.4722890493 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.2144441644 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.4937213115 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.2635775721 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.5351905044 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.290841669 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.5420316418 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.4456194272 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.6359307779 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0578608568 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.2953456626 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.0207182972 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.1887439492 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.2592635841 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.5029886217 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.0439234831 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1458942547 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.1079708762 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.3233208549 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.0349481063 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.3674901842 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0592346231 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.2924121066 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0728848714 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.2476630291 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.2131332995 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.4719991036 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.117107351 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2063865989 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.2199013913 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.4962775874 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.1813558493 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.3079482476 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.1703974411 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.3893360635 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.2345318252 }, { "model":"deepseek\/deepseek-r1-0528", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.453746534 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.311563429 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.5377881998 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.2396682484 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.4396755163 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.2166814512 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.4841980873 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.1410606918 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.4325882329 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.278130538 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.5385292979 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.2384630665 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.3608782934 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.3417210123 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.5899435952 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.2808352256 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.5279437548 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.3412745465 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.5826925715 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.390058702 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.597901367 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.2090093301 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.4788545798 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.2182033882 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.4418555529 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.2884896124 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.4868568502 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.2753136771 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.4912165901 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.2570929124 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.4913361477 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.2251696435 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.4568625878 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.3669654657 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.6010168651 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.2158729847 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.4281994918 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.2496588936 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.4813242802 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.1805770029 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.4382698967 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.2061377509 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.5025518062 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.2617461401 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.4657078066 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.2478040527 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.5055046978 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.2299760176 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.4382930134 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.2988263355 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.5680625724 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.3895386095 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.5693830083 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.3954134409 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.6048442845 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.4087786463 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.6570148202 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.3123005535 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.5830505467 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.335884194 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.5760217609 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.3897427607 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.6211463161 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.3989536756 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.5939433432 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.4245142301 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.6390046108 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.4937713215 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.6856946146 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.3112735556 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.5352584968 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.3040377019 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.513213381 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.5700014681 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.7211557733 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.6711231356 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.8279616884 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.3344072959 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.5751886204 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3905772718 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.6166676981 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.3438075851 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.5772164708 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.2647913841 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.489934723 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.4465199237 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.6601536062 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.3429301939 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.6176510545 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.3411643102 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.5823324237 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.549646027 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.7334440232 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.020521917 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.2363991566 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.038851639 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.2330289804 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.3431336081 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.5478126679 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.1775808906 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.4530128509 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.2923469792 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.5018131258 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.2792533669 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.5455037681 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.4434404142 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.6607832834 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.4308263411 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.6353494719 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.3988769415 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.6179037667 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.1507190186 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.4178703457 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.3268473222 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.5875784187 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.3853964262 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.644268597 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.3626330278 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.5869542517 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.4368014195 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.6964617832 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.2899610129 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.5189509889 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.2768774018 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.5137444144 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.3472094652 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.5708345321 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.2724432186 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.5503510798 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.3422169639 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.5695643744 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.3590717293 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.5878176707 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.3331002174 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.579659503 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.336969824 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.4913494739 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.3700621486 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.5837249923 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.3088767184 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.5884562937 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.2509895727 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.5504416013 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.3061931512 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.5755019454 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.3918191618 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.6170794637 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.1880645416 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.3989811117 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.3364506705 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.568444139 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.3756915681 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.5684194735 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.2805488398 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.5298229011 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.2739358937 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.3591923755 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.1644396626 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.3700254848 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.1334761711 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.430142277 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.3953125797 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.6309277281 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.3149005177 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.5121389124 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.3477763054 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.5972164517 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.2678562502 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.5016256548 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.2950679588 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.5434984641 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.2063059259 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.5248165256 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.3735487132 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.5985929462 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.2991127987 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.5530820193 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.3338185652 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.5787491818 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.2806196555 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.4516145469 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.4099406931 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.6343459464 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.4650961929 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.742377276 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.3081644584 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.567441399 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.2529515223 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.5121166935 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.4035620418 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.6298286173 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.2518010194 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.5019977224 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.2807966919 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.5304954689 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.4120530736 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.6593260342 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.2237313135 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.4999468628 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.2341852741 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.5553049856 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.1546333274 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.3730093916 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0699361494 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.4093787348 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.3421886958 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.5848387431 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.2784394077 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.5108902329 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.4372599799 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.658993109 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.4174871385 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.5931595705 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.2722413511 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.5398004754 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.3757453667 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.579529149 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.3198419121 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.578067895 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.5056809967 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.6966172136 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.3154573291 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.5600439488 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.5650725553 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.7127688163 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.2839962776 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.5461994333 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.4589390255 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.6469214364 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.2696305636 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.5244991633 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.3319598047 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.5861214096 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.3972794455 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.6232130593 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.3415088181 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.5390152372 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.2864590726 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.5427330367 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.3306843079 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.4979151965 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.1795249187 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.439139386 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.2284701542 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.5463949389 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.284867651 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.5244275819 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.2202968782 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.5048544071 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.2893867971 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.5578776437 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.4400648152 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.6115459213 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.32232697 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.5111738773 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.2383157401 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.5175978358 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.3617201239 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.5926536872 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.4685613196 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.6911608485 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.4043048116 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.5944297519 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.499303533 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.7212747243 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.2819090872 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.5410426012 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.2951438389 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.5712604905 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.4243816819 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.6336752528 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.3644641049 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.5789291672 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.2251503741 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.4903696181 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.2873179741 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.5146227404 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.3047162219 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.5499065244 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.4136384398 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.5513959132 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.2867986153 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.4858296113 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.101641187 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.2278398804 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.2782856704 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.5422402953 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.419062749 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.6455051222 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.3049575256 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.5755673238 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.4056417811 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.6001236102 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0390495324 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.2184695701 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0636422012 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.296360163 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.31244285 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.5630479496 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.2911146353 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.4826415387 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.2813736334 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.5350839669 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.3532409319 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.6242374823 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.3436662566 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.59026429 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.447007323 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.6624838094 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0835563722 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.3389944349 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.1150505644 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.3347031666 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.3135484165 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.5406963315 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.1901478921 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.2245996516 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.2533217863 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.4979177365 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.1440335108 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.4965120865 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.1607554286 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.4105347932 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.1645681144 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.349083777 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.2392191989 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.4953232912 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.2147687469 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2691081013 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.3075560511 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.5721434339 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.3628421643 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.3921364269 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.3475884805 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.6151377801 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.3045492612 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.5663641807 }, { "model":"google\/gemini-2.0-flash-001", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.2499368982 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.4866856816 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.2032851597 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.4298526595 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.1425831777 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.417812484 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0942003912 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.3936431694 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.2720072911 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.5061425801 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.1852281481 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.3131355766 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.2661119281 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.5602064313 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.194000916 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.4535351144 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.2723646085 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.523730035 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.3878873306 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.592059186 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.1616972069 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.4328883781 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1751707508 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.4233167915 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.2181300738 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.4481685644 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.215871503 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.454646328 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.2489741176 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.4653663899 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.1897550206 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.4113322824 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.2828158417 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.5179193605 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.1666719193 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.4162476616 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.2101918607 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.4594084738 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.171792145 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.4130816559 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.2000799381 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.4988072587 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.3128559705 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.4876970107 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.2526226356 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.5007947756 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.2513374937 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.4492954272 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.2672489266 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.4952818431 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.3639249804 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.5538942616 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.4195422704 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.6085776274 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.3993771419 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.6357624414 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.260213991 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.4864597898 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.2793994268 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.508602676 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.3077322035 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.5442587621 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.3445618242 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.56756333 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.3382313883 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.5705047774 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.5354923841 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.7125417889 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.3210907235 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.5292167329 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.3443988404 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.5424093748 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.4617498931 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.6321211549 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.5951204739 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.7613397345 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.3547349164 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.5728186386 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3576086442 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.6061019948 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.2676796828 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.5663114288 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.2591969049 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.4814104167 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.4277064952 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.6325003325 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.3269491361 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.612989238 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.2978444639 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.5625976718 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.5244850062 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.7081676298 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0361796916 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.195120399 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0308475815 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.1890051706 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.3150102324 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.5340625085 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.2069349026 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.4761478869 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.218966139 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.4181042336 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.2395753169 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.5013466224 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.4286739426 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.6359453866 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.4344801023 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.6474630132 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.3063272533 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.5583492368 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.1757166109 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.4152865266 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.274174318 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.5398050773 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.2927577916 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.56675535 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.3126497326 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.5733116043 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.3882402101 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.6624914478 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.1503374245 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.387578878 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.2586477386 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.4926307711 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.232232342 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.4479000894 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.2119661542 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.5179791668 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.3116016155 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.5537049588 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.3440495149 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.6004337743 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.289402493 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.5452343766 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.1879084772 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.3787664659 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.3561823456 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.5619277442 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.2609272138 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.5344454302 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.2053088231 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.491430616 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.4063809808 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.6167229896 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.3195578223 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.5643490342 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.2319063764 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.4461032467 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.2485030856 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.4688750005 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.3577614115 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.5550425425 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.2106519512 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.4804338475 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.2003821738 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.2850409306 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.1057085266 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.2715159938 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0708970987 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.3583242702 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.3007758183 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.5378616534 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.2795823496 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.510712197 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.212326315 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.4808320621 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.2353530898 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.498918765 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.2180528106 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.4350629264 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.1752962142 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.4941543502 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.3848912948 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.6007084641 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.3023908744 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.5415851472 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.2906692433 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.5058194876 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.2450137243 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.4736891146 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.3362256422 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.5937301471 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.4506461939 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.7014575648 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.2814491551 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.5513455988 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.2724659132 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.5135188138 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.3218775268 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.5549355351 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.2676260775 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.5006126727 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.2807256089 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.5254117929 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.321808319 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.5984422632 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.1741946793 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.3946218629 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.1729000095 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.501423047 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.1059783758 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.3306681685 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.044447102 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.379164388 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.3044074075 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.5411406602 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.2312884216 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.5063263205 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.3792889323 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.6001893022 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.4267930078 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.583867208 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.2910351794 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.5387220968 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.352446244 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.5847615168 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.3275326122 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.5672682921 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.4390630519 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.6588223005 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.3171625415 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.5772460535 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.4778809926 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.6619427768 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.266439807 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.5101737799 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.4085478092 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.5658655698 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.15500158 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.3976950814 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.2871474268 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.5626056331 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.3431093451 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.5329383828 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.2838954554 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.48412 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.28322243 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.5207906875 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.2943621784 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.4749986301 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.1334599567 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.3857630332 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.1122823827 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.4635739463 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.2156343628 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.4198074415 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.2207115321 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.5118672399 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.2933759432 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.5710698033 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.4425485023 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.6215234533 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.2152161054 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.3971586695 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.2374945192 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.5272097328 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.3335095456 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.5683382887 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.4033731642 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.6519271741 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.2763965367 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.520411275 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.4350912598 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.6853284539 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.2415112532 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.4752276392 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.2730244696 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.5683000198 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.3528600199 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.5850279702 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.3231474889 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.5470305035 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.208524293 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.4271785057 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.2278942993 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.4700484002 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.2681599618 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.5133569475 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.3973895019 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.564303282 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.1768961884 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.348072496 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.1310924895 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.2594660098 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.2806311806 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.5431910468 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.3916346072 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.6289997941 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.3007101794 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.5519617104 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.319789332 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.540431455 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.025133561 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.1190888288 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.018289241 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.1529779827 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.2859928961 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.5295533045 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.3010091175 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.493353195 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.2407351505 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.4734226532 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.3363563156 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.5891323556 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.3170290484 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.5491010642 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.4032471641 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.6394566771 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0638721925 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.2417596357 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.0586926041 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.2518105026 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.2746262088 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.4924092277 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.1648044562 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.2037009528 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.2251844915 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.4461128395 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.134895078 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.4836355083 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.1141869405 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.3128047258 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.1341626528 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.2926500144 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.1826865099 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.4272648905 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.1499743312 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2347308661 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.2557280993 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.5212143675 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.2610248692 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.314329989 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.2674818373 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.5139550602 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.2723879605 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.567169258 }, { "model":"google\/gemini-2.0-flash-lite-001", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.342116281 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.559017125 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.2612038772 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.4610162591 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.2362598693 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.4567970323 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.1787109448 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.4308196228 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.2883756135 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.5452929372 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.2563045907 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.3587997566 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.3529391424 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.6061230642 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.3735935027 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.6118752881 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.321926202 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.5672345783 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.3971969927 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.5997335085 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.178647434 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.459184816 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1766325657 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.42829263 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.2689181562 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.5103302194 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.2712387895 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.4721454199 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.302725237 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.542445303 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.2074435657 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.4358785934 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.3306584572 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.5470737398 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.1839015438 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.3995221223 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.2402619776 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.4816842061 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.2017479595 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.4438028104 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.1964434077 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.4658109118 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.3578761246 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.5250650323 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.3118354834 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.5301057957 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.2712094702 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.4554148161 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.3106246769 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.5720599098 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.3532786899 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.5492979392 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.4167458111 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.6414773714 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.4574369641 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.6642298649 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.3171272191 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.5378911972 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.3576320675 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.5996552124 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.3763167038 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.6069984198 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.412011399 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.5948875971 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.4097540667 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.627572506 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.5159030608 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.6896498523 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.3297475202 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.5587828835 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.3865296224 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.5630460332 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.3392082462 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.572145108 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3956672126 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.6257285571 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.3764428485 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.5794911256 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.3019679958 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.5294391762 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.3967317865 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.6195138455 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.3512547173 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.6404359092 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.3566291662 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.5901709379 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.5810870953 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.7402195597 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0654195918 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.2431272498 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0469027058 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.2599881115 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.3528703899 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.5605109395 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.180119873 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.4654772276 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.2852268084 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.4820918601 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.3191497109 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.5714009602 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.0284513224 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.1002460472 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.139929051 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.2620347708 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.336133928 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.545638091 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.1683321583 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.4261207547 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.3151646581 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.5599039863 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.4642022823 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.6720003623 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.3580586993 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.5973680765 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.466208483 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.7131031141 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.2779082008 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.5108078595 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.2835120188 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.5067796001 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.2679844764 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.5037933164 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.243508886 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.5329216971 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.3211161163 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.5700208797 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.3654943432 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.5949572053 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.3652971414 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.605823173 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.3680693686 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.4700002965 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.3627331006 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.5928422481 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.3781416178 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.6275555619 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.2553025069 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.5408614418 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.3062994849 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.58504635 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.3700483899 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.6006134318 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.2315452529 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.4617416997 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.2885658633 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.5512502051 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.3602561303 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.5568929694 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.2490831768 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.5186737173 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.2780296298 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.3546301665 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.0591613845 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.2662007935 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0779434955 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.3095367271 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.3908377774 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.6260985434 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.3130016025 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.5391710538 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.3471192 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.5771683143 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.2207082106 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.4870892013 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.2993063676 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.5474937127 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.2153101678 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.5379094165 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.3532448793 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.5657281022 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.2774888867 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.5389145892 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.3233458031 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.5703875576 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.2274584553 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.4665758709 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.3746934831 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.6046609636 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.4734156929 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.7284733826 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.3552781219 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.5977013775 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.3211140622 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.544805929 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.3774439938 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.599804205 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.2861323795 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.5180107937 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.312116976 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.5463170004 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.3780530389 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.6232733213 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.2177968416 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.4415294523 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.1367315108 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.4585024296 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.1871006972 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.4365658925 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.106229994 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.4277182017 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.3701266209 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.6213278685 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.3031726243 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.5214945108 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.4366998679 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.657751239 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.4527636476 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.6078708965 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.3170527901 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.5593259189 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.381029585 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.5993345379 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.3912183043 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.5915454866 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.4626670594 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.6755172019 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.3285564053 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.5835489949 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.5863602394 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.7345305045 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.2800732142 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.5572474453 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.4692280866 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.6613842883 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.2768738298 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.5136190092 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.2743310586 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.5596031593 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.3271104301 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.5357780664 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.2320778637 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.4100893183 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.2632131459 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.4981476408 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.3150668549 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.466344362 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.1425864886 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.3921687091 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.1876717865 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.4923376927 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.2921978364 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.5121729513 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.2437506181 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.511793128 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.3070769379 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.5829431146 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.4562210568 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.6349957477 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.2923338131 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.4917795718 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.2448808161 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.5213243396 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.340913979 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.5878242881 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.4428192719 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.6769035337 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.3616286251 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.5800788406 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.4820281618 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.7239645292 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.2700100505 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.5372825559 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.30759425 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.5725597295 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.4465523529 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.6649928543 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.4129762096 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.6159040363 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.3236889282 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.562821135 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.4114706745 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.560767027 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.2476296934 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.5033069835 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.1179117378 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.2587205011 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.3291780472 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.5900439285 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.4285247051 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.6508035663 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.3407035036 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.583433778 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.4167194618 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.6135073244 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.3449466128 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.5552120384 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.3046252906 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.493749829 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.2583582755 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.483302551 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.3579243963 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.6083657804 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.3536805169 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.589790723 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.4655851302 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.6703129046 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0932068478 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.3792197219 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.1171143464 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.3384795969 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.2793739299 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.5145166794 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.1321430026 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.176594989 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.1979226992 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.4588070152 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.1181506898 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.4718021868 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.1110331374 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.3622261661 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.1764819134 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.3463753843 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.2753706711 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.5198869679 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.2207873686 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2915581098 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.3429319501 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.5895603725 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.2713803282 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.5773152323 }, { "model":"google\/gemini-2.5-flash", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.0532606841 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.1073712755 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0366084106 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.0918534276 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.0152635235 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.0888307029 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.2298187784 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.0558323892 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.0713257426 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.1072693099 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.1463494979 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.2497593431 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.0094322191 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.1037916124 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.0001965409 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.0267044753 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.1125538275 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.0146450668 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.0694240797 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.0347588238 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.1419212765 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.0582570743 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.1132092265 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.0098853623 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.0165633766 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.0288866262 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.0765030508 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.1414592386 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.2511168982 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.050165656 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.059692947 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.1732325986 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.320908965 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.1653880539 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.2211462144 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.1467739974 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.1754458302 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.0507024887 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.1030268479 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.1697494021 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.2799880729 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.2781125894 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.4200022747 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.1216206159 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.2069979707 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.120693374 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.1968497699 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.1189162738 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.1846074997 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.1307394464 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.2486210965 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.3167547043 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.3513933765 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.5455939196 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.5713088936 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.0379503072 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.1087301231 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.1171367611 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.2647620406 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.0584383584 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.1412915198 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.0396603748 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.0985321352 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.0985964312 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.156061678 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.0702762868 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.1586154477 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.2019995088 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.3236372397 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.3648977534 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.4646316658 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.013121921 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.1524824027 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.2518370758 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.0406782903 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.0604419015 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.1183279848 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.1920673939 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.1077307738 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.1927713334 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.2445177715 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.3496977746 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.1662382153 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.1808826046 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.1043372044 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.1458478186 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.021727044 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.0580949052 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.0418300745 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.0806441203 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.1378617741 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.2350595049 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.0504154457 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.1355433195 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.2636104799 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.0732503288 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.1225115139 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.0583064468 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.1449618078 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.1541511534 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.2680259178 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.1272530801 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.2851760515 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.0473708874 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.1079640005 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.0283865781 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.0449505586 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.1349286875 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.2409523809 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.0677858867 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.1661259662 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.1074769757 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.1755162217 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.1024243094 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.1478558086 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.1405067201 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.2106207596 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.0416456555 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.0990623031 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.0795987945 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.1674316707 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.0674985081 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.1154928046 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.0672103499 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.1538606955 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.0731686523 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.0985812466 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.2208894183 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.3047935907 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.006336512 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.0186059462 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.094505426 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.2017400541 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.0141271464 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.0334534153 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.0867381827 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.1588310511 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.0055901791 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.0899799957 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.1294816588 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.2140376737 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.1096418767 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.1484602611 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.2123460731 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.355254469 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.0244791954 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.137921948 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.1020473557 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.197831409 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.1388655603 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.3318625881 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.1641407036 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.266680691 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.0091866723 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.0286785733 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.1394131915 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.2765922512 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.0483965296 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.1060232209 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.10034493 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.1553807871 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.2797145372 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.4388238124 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.034290559 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.0746115811 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.0446825714 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.2751100361 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.035737951 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.1320906069 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0471165976 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.1649158656 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.1717883762 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.2829924006 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.0473366133 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.0472659921 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.3123594633 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.4403827935 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.1719687661 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.1882752522 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.0749300244 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.1200920019 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.1830386377 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.2502216632 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.1361030105 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.2437066717 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.1732366992 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.2953051043 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.1191095692 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.2528032318 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.1851411977 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.2745749365 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.0328251853 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.0948529778 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.2486142719 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.3250953964 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.1260552814 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.167920313 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.1569836743 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.2025766659 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.0375021678 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.0518133834 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.045375844 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.0834549749 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.1243674491 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.2280537353 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.0291939407 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.1082649083 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.0370853459 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.1174899825 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.1757682146 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.3008766306 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.2082330564 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.2618628182 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.1395902324 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.2174681725 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.119440034 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.2479022501 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.1807911166 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.3244473544 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.1337368217 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.2173463535 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.0167265048 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.0601986184 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.2110013881 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.3419482007 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.1301054745 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.2254627309 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.1643043557 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.2361420263 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.1225039269 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.198958675 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.0931461339 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.1419037126 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.0329651295 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.0924755182 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.0726933467 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.0966899881 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.0627836379 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.1537747644 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.2746649389 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.4031644529 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.1060274283 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.1886969167 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.044287435 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.1034692205 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.2528660307 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.319204417 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.1044041173 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.166665052 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.0152843146 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.1294358837 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.2479602917 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.2169787191 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.3030152567 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.2322985671 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.3328943549 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.0600208887 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.1457445652 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.0006881025 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0093430764 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.0639334201 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0290365467 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.0799597164 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.0058978605 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.0483361134 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.1308987845 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.1738613828 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.1669728523 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.1905927635 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.0414601372 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.0662245232 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.0085456627 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.0621750153 }, { "model":"google\/gemini-2.5-pro", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.2073802913 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.4889223975 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.0840656979 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.3453561943 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.120094546 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.3259782194 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0974181135 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.3477814679 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.2393172056 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.4971254293 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.2089212841 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.3406916002 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.2712045148 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.5477096036 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.178052271 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.468064885 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.2747843596 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.5519960681 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.2863967069 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.5318173199 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.1096694862 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.4291604898 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1630720543 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.3952400339 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.1892846534 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.4212342522 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.1938470016 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.4527968539 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.2094379574 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.4509809217 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.1931386564 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.4233010233 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.2957522582 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.5232039352 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.2308361669 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.4087255612 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.200456445 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.4226152307 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.1414132922 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.4170843853 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.18522743 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.4467570037 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.2590661095 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.4657468506 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.2663307677 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.519985227 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.1913577407 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.4064669591 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.271237739 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.5173954387 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.3067537945 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.5194482945 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.3694979709 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.59081536 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.374702944 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.6019503341 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.2792699678 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.5157552806 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.2334415639 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.5128705295 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.3536861453 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.6024608455 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.4031829559 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.6234553711 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.3189602129 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.5548503533 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.5229096392 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.7023434262 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.3137252517 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.5147981205 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.3302929673 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.505425141 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.3889146477 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.628092835 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.4660772497 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.7280386297 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.3593767686 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.5668073679 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3662275621 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.6102640711 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.286051969 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.5373856549 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.1816947237 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.3981159206 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.3579818144 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.5889481625 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.3403832088 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.590264879 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.3180384008 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.5571267732 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.5778354146 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.7490356238 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0380719948 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.2066039108 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0259757351 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.147148937 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.34811918 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.5482709715 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.1618983325 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.4411905252 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.1925315551 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.4032389241 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.1992720083 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.4905441802 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.3722751955 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.6207213131 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.3378499277 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.554090013 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.2879989689 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.5083598943 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.1719225434 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.3992950999 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.3234067809 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.5706707095 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.3187264685 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.5856828402 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.3583744222 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.5732194975 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.3364664006 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.6438910651 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.225000401 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.4567066441 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.1941055199 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.4504811493 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.1944477164 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.4517028309 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.2035517344 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.489419705 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.3074361781 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.5178180754 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.3505959215 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.5955060476 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.2581140706 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.5395853617 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.3298839393 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.4471547552 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.3177915441 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.5387853038 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.2549228547 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.5322440265 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.1983700044 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.4843458319 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.2920207746 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.5345155349 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.3702042307 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.5776853975 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.1498433716 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.3652702605 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.2858443353 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.5499221943 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.2481102245 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.4829685786 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.2305492704 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.4826740501 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.1746024172 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.3073554703 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.0913600379 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.3305636235 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0269728382 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.3155017027 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.3751831337 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.6085851316 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.259988405 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.5046714005 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.306099972 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.5370842801 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.1736022871 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.4305653856 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.2826629018 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.5215979873 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.238462643 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.5334745774 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.3161992509 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.5479755911 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.2133071404 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.4660281027 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.2370074805 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.4804215458 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.2399769139 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.4726429935 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.3104483533 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.5705763492 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.4229626959 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.6856510383 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.2227645269 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.4888582617 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.1869632744 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.4322398057 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.3115387303 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.5342290246 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.22081567 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.4878836055 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.3125704924 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.5397676594 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.3586968371 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.6075205554 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.1901221224 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.438728736 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.1330024304 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.4711022084 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0258426139 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.2237241232 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0460531144 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.3418147419 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.3714452662 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.5977153904 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.2234825764 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.4562477173 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.4180718844 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.6426219278 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.3938693136 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.5573992167 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.2456102401 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.5190609119 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.4265619216 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.6320824157 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.3537745123 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.5904429929 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.4588664196 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.6844540285 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.3139442337 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.5741447282 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.5016049999 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.6788048008 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.2449777422 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.5268764903 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.3789708434 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.5790333031 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.1839360587 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.448997409 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.1891835724 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.508623725 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.3352727297 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.5583215205 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.2009000601 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.4004383195 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.2297304995 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.5040607132 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.2136543311 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.3916393466 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.1221415503 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.3516954503 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.108688779 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.4515663403 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.2308889646 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.4618048204 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.1667302795 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.4717296026 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.2351861569 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.5329036218 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.4168384094 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.6032787874 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.2291561983 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.4673987803 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.2036733766 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.5047620958 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.3500384253 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.5797456052 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.4268868445 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.664863412 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.3005035588 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.5210660172 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.3401968092 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.6217197146 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.2662307086 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.5053585639 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.3044345778 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.5714036731 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.3330093484 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.5694168709 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.257812168 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.5167002436 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.2176170344 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.4496406258 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.2605536967 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.4816066849 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.2992360169 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.5585599708 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.4269079012 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.5719718715 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.2042995208 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.416626147 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.0833250166 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.1982489294 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.2832304201 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.5467240003 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.3273464288 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.5827048506 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.2813742416 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.5413704266 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.4063054094 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.6020718231 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0495917134 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.1711087397 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0358872001 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.2761667256 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.322448107 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.5887654616 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.2148139783 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.4226865444 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.2829644119 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.5194956482 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.2911955464 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.5560139888 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.2745000434 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.5206422805 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.4261790941 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.6358462464 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0696458062 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.2461140434 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.0728989985 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.2267265908 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.2072386748 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.4628288648 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.1471425714 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1971299212 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.253783308 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.4487387303 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.0662544821 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.3784904721 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0887390501 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.3201148841 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.1178050815 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.307433063 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.2253512269 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.4949150094 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.1897306863 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2561574259 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.2201641871 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.5051068628 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.2508351517 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.3110461024 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.252616884 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.4822778382 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.1940901676 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.4661416094 }, { "model":"google\/gemini-flash-1.5", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.2041309024 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.4630820951 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.1453469275 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.3874336138 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.1059711376 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.300568481 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0110669593 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.2141540563 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.1906134629 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.4467868389 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.2001643223 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.3225170104 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.1876459632 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.4830875841 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.1830944017 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.4175337587 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.260683336 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.5383651277 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.2962406565 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.5190026627 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.1438491224 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.418499848 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1044262978 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.3577242047 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.1675595946 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.4187188467 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.1545869288 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.4031218248 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.1892328534 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.4434206925 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.1070430926 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.3318636339 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.3180630074 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.5401606876 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.1993490206 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.4215901923 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.1604267099 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.4139767864 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.1651025864 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.3978212407 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.19213953 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.4512512424 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.2406657525 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.4536513075 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.2519150677 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.4966963131 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.2057435019 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.4016427491 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.2183929994 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.4877941086 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.2700916391 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.5150577414 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.2900668497 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.5379961095 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.3193377157 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.5978978692 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.2457083208 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.4937183307 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.1424911854 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.3546559531 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.2481120403 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.5339550423 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.3131426524 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.5548197404 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.3440655166 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.563902418 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.4305522274 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.6477508732 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.2080428665 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.4376921278 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.3106300811 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.4971105137 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.4113463435 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.6260248317 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.5310035709 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.7595845064 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.3031284355 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.5241309352 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3289699508 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.5811203167 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.2483616515 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.5338391625 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.1651229998 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.3923963113 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.3057177881 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.5523945263 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.2711892461 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.5802332073 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.3349110908 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.564806297 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.4574014191 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.6626552528 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0488154154 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.1944904286 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0115014356 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.1082073343 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.2954497906 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.5086877895 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.1582270271 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.4201411039 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.1697291765 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.4073157654 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.139672818 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.4450194819 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.3184042229 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.5712698408 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.3659029431 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.5796195236 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.2791185419 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.5226615992 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.145820804 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.3820953887 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.2313008892 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.4915348458 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.2765024802 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.5261755337 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.2301748885 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.4682741896 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.3346592082 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.6514874668 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.154261694 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.3957095627 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.1177946719 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.341868335 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.2239397579 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.4512212104 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.135748348 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.4408716957 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.2724260509 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.5200202435 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.2753225284 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.5457466615 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.2664966821 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.5386982677 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.2651365589 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.409095006 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.2326358655 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.4815897231 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.2187928356 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.4896578943 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.1666068635 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.4554883841 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.1547742726 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.4534139462 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.2647824193 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.5269086196 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.1635334444 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.3431273828 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.247746183 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.506339637 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.1775009719 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.4300321597 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.2402657185 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.497198112 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.3100527074 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.3696197774 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.0872330227 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.3059813913 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0010116202 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.1893341465 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.3447519877 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.578789784 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.2508560655 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.4987822313 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.2732982319 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.5233285219 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.137657899 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.3935929024 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.2082275626 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.4639776287 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.1350252624 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.4834543859 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.2600500491 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.5029669853 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.2091322046 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.4711774201 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.2256246926 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.4703189943 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.2254492518 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.447826525 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.2986303081 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.5546917725 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.3680194341 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.6778287705 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.1979480779 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.4791457508 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.1536786708 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.4315811907 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.2955515679 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.5399574649 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.2350766648 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.4890671168 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.2583853642 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.5143387984 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.3585971813 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.6255063069 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.1562574059 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.378833839 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.1000795039 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.447037349 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0981161875 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.3370208163 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.040931235 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.3615428475 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.2626677598 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.4970567085 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.1807466012 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.4219189716 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.3706063992 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.6167676482 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.4234596823 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.5629443923 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.2754265608 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.5207065369 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.3700040895 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.5924241261 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.2971403532 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.5321068893 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.4364286549 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.6509885745 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.27702997 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.5437386483 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.4580925611 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.6514836722 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.2300270544 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.4839384065 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.4236492288 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.6116207052 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.1602143293 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.3793757948 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.1370228414 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.4797772284 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.2571204202 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.4946608155 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.0869374651 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.3119061498 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.1981443603 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.4536105905 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.2278086127 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.4013315084 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.0939343156 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.3129229613 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.074740365 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.4045773842 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.2273071628 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.4497946959 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.1824497409 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.4972329945 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.2541965029 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.5234491687 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.3308712415 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.5512495988 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.1522391036 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.3508292995 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.1514782919 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.4909144205 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.2959760233 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.5398896148 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.4357891553 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.6529723913 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.2501435914 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.5088299265 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.262372343 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.5806899403 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.2450100573 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.4918691312 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.2434733519 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.5120095348 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.3568851036 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.5825326367 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.3112091725 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.5431414206 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.1741933649 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.4272342177 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.1811584685 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.4001890626 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.2544201673 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.5081271409 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.2942923294 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.4479604827 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.0881111208 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.3173214379 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.0384668791 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.1259439982 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.2563119866 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.5291012922 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.3009595898 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.5854044281 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.2567288533 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.5177571061 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.3201007033 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.5323037228 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0115980217 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.115883071 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0129780747 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.1450749981 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.2402951661 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.5033005385 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.2302239803 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.4066956434 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.203750264 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.4979829233 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.2162945849 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.4941278712 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.2537752957 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.5073147534 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.3583753747 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.6253917282 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.1081430594 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.2665454299 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.0142970887 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.1489810124 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.1700904158 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.421111634 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.1213993524 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1655788185 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.1531171972 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.3828830786 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.0483942569 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.3116951706 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0816098185 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.2781732759 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.041496472 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.202397124 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.1783312983 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.4423885999 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.1632112014 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2297357227 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.249810194 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.5151255506 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.1854861198 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.2463326959 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.1729786376 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.4189697233 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.0866404913 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.3943277627 }, { "model":"google\/gemini-flash-1.5-8b", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.2177971147 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.4738076987 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.1414064724 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.3965739567 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.076595229 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.2493366365 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0440715947 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.2820233612 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.1913062339 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.4296053228 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.13443556 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.2528930204 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.24265587 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.4918380331 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.2104382871 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.456050442 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.2891206499 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.5438550217 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.3184721364 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.5483731849 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.1664804364 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.4585261833 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1299183594 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.3752977557 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.1618648119 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.4104839109 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.2266738862 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.4315390742 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.2602059805 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.4987515978 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.0907943093 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.348768221 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.3277667824 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.5267403611 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.2123273366 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.408906638 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.1880331404 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.4234748209 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.1517877566 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.423956163 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.148851004 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.4514291775 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.3105472783 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.498684126 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.2430984589 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.4969060141 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.1699224465 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.3964402252 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.2907230812 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.5148223626 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.3473636391 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.5442574441 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.3650597419 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.5512750223 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.293824845 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.5724817779 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.1983726871 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.4779908235 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.0792877335 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.3908004248 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.2863884915 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.5641108436 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.352498756 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.5860513143 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.3577876868 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.5943423055 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.5344280565 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.7084649844 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.2914236052 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.5147962724 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.3761179017 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.5590147212 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.452427177 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.6525566656 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.5467976399 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.7780833183 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.3554331718 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.5997743406 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3847830842 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.6191109047 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.2579194729 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.5351839762 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.1571676635 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.4046770996 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.3771043132 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.5835797455 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.3564426025 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.6107274367 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.2846092378 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.5655970541 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.5192984544 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.7020040834 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0220051815 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.1861453784 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0227307294 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.1850492522 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.2796934014 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.5155626456 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.1979202011 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.4528880823 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.1595296755 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.3859356797 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.171830216 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.4608354018 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.3520691191 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.6035990708 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.3987037224 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.6195037668 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.2709410734 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.4976144005 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.1150407607 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.3709160058 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.2903150375 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.5392715859 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.3460432788 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.6009670508 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.3377417704 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.5674360496 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.3534620252 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.6680177029 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.1600009223 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.3857586031 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.1602266912 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.4091024664 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.1546473042 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.3985794204 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.1752645287 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.4668449261 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.3356485456 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.5684527887 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.3627134123 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.6050822949 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.2481856237 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.5180749152 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.242508046 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.4046420215 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.3019627022 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.5133980923 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.2316517545 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.5189963647 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.2187004813 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.4910590831 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.2108939118 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.4375825873 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.3274744668 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.5605813039 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.0800539722 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.3336188156 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.2692189197 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.5290912174 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.2869741566 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.5072256514 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.2172591082 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.478962626 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.2169046229 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.3151387909 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.0905061152 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.321707617 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0361920973 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.222315171 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.3100950481 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.558054933 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.2213152575 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.4821662369 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.3126340837 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.5601639768 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.1875297747 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.4394137195 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.2502298144 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.5206889602 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.1301910408 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.4488625613 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.2809005667 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.5466717628 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.2287455417 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.4915489263 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.3395095603 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.5877742809 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.2222923122 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.4572688692 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.3794800258 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.6256125923 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.3593747877 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.664135376 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.251920694 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.4662583176 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.1647980206 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.4166796691 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.3022338928 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.5587522289 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.2252421952 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.4768786292 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.2793746981 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.5246312011 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.3496466203 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.6032151622 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.119086784 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.3898511388 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.0923649849 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.4837931302 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0417850648 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.2509675066 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0312813941 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.2886309955 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.2613495089 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.5009335042 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.1648455996 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.3943041737 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.4164890636 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.6375470445 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.4166823661 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.575314128 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.2913506513 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.5431985912 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.3723742743 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.5891983505 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.340245547 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.5681284927 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.4702737577 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.6903236014 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.272965046 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.5527916308 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.5195197328 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.6892729705 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.2862936285 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.5264436928 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.4800957551 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.6618495803 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.1701995093 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.4262662427 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.1513262342 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.4732082637 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.2365858071 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.4722212406 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.2251623508 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.4159341653 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.226689844 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.4706510499 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.2258552473 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.4191499082 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.0756830418 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.3184767575 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.0495523985 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.3971096934 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.1915993132 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.4208812642 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.1724511246 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.4741419887 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.245439349 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.536270172 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.3929818488 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.5787667028 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.1784974236 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.4520828188 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.1435021957 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.4868234587 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.3258404036 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.5652149653 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.4264864443 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.6543542662 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.268709657 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.5306834056 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.3395981599 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.6130756934 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.2563448403 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.5007966916 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.2405135195 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.5564984925 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.2787922254 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.5420797212 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.259866454 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.48543634 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.2018690154 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.4335923466 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.0972794658 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.327182503 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.2400131449 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.4851690277 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.383380628 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.5430720239 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.0736674948 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.2974206944 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.0241026131 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.1246172628 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.2757340333 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.5244536559 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.3185578758 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.5765088485 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.2918547905 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.5631912653 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.3283437369 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.5565790802 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0359452883 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.1779043042 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0305779168 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.1838354035 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.2595582459 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.5043992681 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.2142625601 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.429749938 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.2595944841 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.5081810113 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.2601189518 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.5225655991 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.2279880384 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.4835933272 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.3478085621 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.5968604742 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.065945115 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.2358663461 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.0540055322 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.2390749172 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.246042863 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.4917114856 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.1017188886 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1707828137 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.167004472 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.400944552 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.066271851 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.3937495329 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0472060067 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.2924612708 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0525309984 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.2304098638 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.2 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.2124709579 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.4811646042 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.1392232 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2205120991 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.1900086584 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.4895930442 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.2395565562 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.3237759485 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.1607803472 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.4377738064 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.1538390263 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.4751516021 }, { "model":"google\/gemma-3-27b-it", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.3 }, { "model":"google\/translate-v2", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.3397504765 }, { "model":"google\/translate-v2", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.560449359 }, { "model":"google\/translate-v2", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.278826715 }, { "model":"google\/translate-v2", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.4815690002 }, { "model":"google\/translate-v2", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.2795001892 }, { "model":"google\/translate-v2", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.5119662189 }, { "model":"google\/translate-v2", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.1719218154 }, { "model":"google\/translate-v2", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.4619906072 }, { "model":"google\/translate-v2", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.2955792162 }, { "model":"google\/translate-v2", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.5460142346 }, { "model":"google\/translate-v2", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.2550661243 }, { "model":"google\/translate-v2", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.3516234079 }, { "model":"google\/translate-v2", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.3399225795 }, { "model":"google\/translate-v2", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.5942330704 }, { "model":"google\/translate-v2", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.2816905761 }, { "model":"google\/translate-v2", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.5676475667 }, { "model":"google\/translate-v2", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.3493985929 }, { "model":"google\/translate-v2", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.5975748844 }, { "model":"google\/translate-v2", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.3475330474 }, { "model":"google\/translate-v2", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.5776317086 }, { "model":"google\/translate-v2", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.2092670256 }, { "model":"google\/translate-v2", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.466339127 }, { "model":"google\/translate-v2", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1728000073 }, { "model":"google\/translate-v2", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.4285875773 }, { "model":"google\/translate-v2", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.2747967998 }, { "model":"google\/translate-v2", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.5090740494 }, { "model":"google\/translate-v2", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.2401570931 }, { "model":"google\/translate-v2", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.4670149488 }, { "model":"google\/translate-v2", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.2674941424 }, { "model":"google\/translate-v2", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.541891802 }, { "model":"google\/translate-v2", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.1979823055 }, { "model":"google\/translate-v2", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.4521218857 }, { "model":"google\/translate-v2", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.3768023433 }, { "model":"google\/translate-v2", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.6041064745 }, { "model":"google\/translate-v2", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.2943485815 }, { "model":"google\/translate-v2", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.5100804178 }, { "model":"google\/translate-v2", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.2273493056 }, { "model":"google\/translate-v2", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.4822061401 }, { "model":"google\/translate-v2", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.1907459838 }, { "model":"google\/translate-v2", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.4304499853 }, { "model":"google\/translate-v2", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.2698506992 }, { "model":"google\/translate-v2", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.5332909304 }, { "model":"google\/translate-v2", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.3805163094 }, { "model":"google\/translate-v2", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.5444910857 }, { "model":"google\/translate-v2", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.3207673833 }, { "model":"google\/translate-v2", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.5578909014 }, { "model":"google\/translate-v2", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.1931718671 }, { "model":"google\/translate-v2", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.4102436779 }, { "model":"google\/translate-v2", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.2940937001 }, { "model":"google\/translate-v2", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.5656960013 }, { "model":"google\/translate-v2", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.4105743367 }, { "model":"google\/translate-v2", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.582274226 }, { "model":"google\/translate-v2", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.455106564 }, { "model":"google\/translate-v2", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.6691241367 }, { "model":"google\/translate-v2", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.5332526559 }, { "model":"google\/translate-v2", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.7086055004 }, { "model":"google\/translate-v2", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.2827542245 }, { "model":"google\/translate-v2", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.5361942504 }, { "model":"google\/translate-v2", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.3935667187 }, { "model":"google\/translate-v2", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.6110160857 }, { "model":"google\/translate-v2", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.3504448262 }, { "model":"google\/translate-v2", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.6171883377 }, { "model":"google\/translate-v2", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.4493928736 }, { "model":"google\/translate-v2", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.6238587383 }, { "model":"google\/translate-v2", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.4085073951 }, { "model":"google\/translate-v2", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.6323019852 }, { "model":"google\/translate-v2", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.5247754427 }, { "model":"google\/translate-v2", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.7142099767 }, { "model":"google\/translate-v2", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.3267673394 }, { "model":"google\/translate-v2", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.5607576056 }, { "model":"google\/translate-v2", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.3745254965 }, { "model":"google\/translate-v2", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.5676283692 }, { "model":"google\/translate-v2", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.6256942034 }, { "model":"google\/translate-v2", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.7540191814 }, { "model":"google\/translate-v2", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.70888051 }, { "model":"google\/translate-v2", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.8684926816 }, { "model":"google\/translate-v2", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.350374858 }, { "model":"google\/translate-v2", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.5997407835 }, { "model":"google\/translate-v2", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3746387789 }, { "model":"google\/translate-v2", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.6101421618 }, { "model":"google\/translate-v2", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.3295944742 }, { "model":"google\/translate-v2", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.5865092795 }, { "model":"google\/translate-v2", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.2930219204 }, { "model":"google\/translate-v2", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.5297678901 }, { "model":"google\/translate-v2", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.4111750064 }, { "model":"google\/translate-v2", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.6314637291 }, { "model":"google\/translate-v2", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.4316058282 }, { "model":"google\/translate-v2", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.6598411557 }, { "model":"google\/translate-v2", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.3779603397 }, { "model":"google\/translate-v2", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.6286963509 }, { "model":"google\/translate-v2", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.5835846952 }, { "model":"google\/translate-v2", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.7468900473 }, { "model":"google\/translate-v2", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.1585736619 }, { "model":"google\/translate-v2", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.3595908619 }, { "model":"google\/translate-v2", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0281783964 }, { "model":"google\/translate-v2", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.2135990911 }, { "model":"google\/translate-v2", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.3338203117 }, { "model":"google\/translate-v2", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.550900416 }, { "model":"google\/translate-v2", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.2235904654 }, { "model":"google\/translate-v2", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.4889537149 }, { "model":"google\/translate-v2", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.3014462049 }, { "model":"google\/translate-v2", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.5332346012 }, { "model":"google\/translate-v2", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.2941569015 }, { "model":"google\/translate-v2", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.5452786239 }, { "model":"google\/translate-v2", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.432237812 }, { "model":"google\/translate-v2", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.6378291521 }, { "model":"google\/translate-v2", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.4001439439 }, { "model":"google\/translate-v2", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.6257483281 }, { "model":"google\/translate-v2", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/translate-v2", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/translate-v2", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/translate-v2", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/translate-v2", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.3369838412 }, { "model":"google\/translate-v2", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.564308487 }, { "model":"google\/translate-v2", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.4344044669 }, { "model":"google\/translate-v2", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.6654473209 }, { "model":"google\/translate-v2", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.3943233817 }, { "model":"google\/translate-v2", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.6168947522 }, { "model":"google\/translate-v2", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.5116660025 }, { "model":"google\/translate-v2", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.730698655 }, { "model":"google\/translate-v2", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.3708158915 }, { "model":"google\/translate-v2", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.5993132477 }, { "model":"google\/translate-v2", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.2705214178 }, { "model":"google\/translate-v2", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.504548883 }, { "model":"google\/translate-v2", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.3900081426 }, { "model":"google\/translate-v2", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.6052122639 }, { "model":"google\/translate-v2", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.3336718595 }, { "model":"google\/translate-v2", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.5750387432 }, { "model":"google\/translate-v2", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.3278554945 }, { "model":"google\/translate-v2", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.5743022789 }, { "model":"google\/translate-v2", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.3394020951 }, { "model":"google\/translate-v2", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.5938537899 }, { "model":"google\/translate-v2", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.3409989486 }, { "model":"google\/translate-v2", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.5959288844 }, { "model":"google\/translate-v2", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.3711980077 }, { "model":"google\/translate-v2", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.4848412412 }, { "model":"google\/translate-v2", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.3673114251 }, { "model":"google\/translate-v2", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.6183652016 }, { "model":"google\/translate-v2", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.3709969529 }, { "model":"google\/translate-v2", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.598464243 }, { "model":"google\/translate-v2", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.3570145905 }, { "model":"google\/translate-v2", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.5887718416 }, { "model":"google\/translate-v2", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.3713033391 }, { "model":"google\/translate-v2", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.604716209 }, { "model":"google\/translate-v2", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.3851664104 }, { "model":"google\/translate-v2", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.6312237305 }, { "model":"google\/translate-v2", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.1874455996 }, { "model":"google\/translate-v2", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.4426393743 }, { "model":"google\/translate-v2", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.3113098415 }, { "model":"google\/translate-v2", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.5501022834 }, { "model":"google\/translate-v2", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.3347737931 }, { "model":"google\/translate-v2", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.5576944014 }, { "model":"google\/translate-v2", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.2822808126 }, { "model":"google\/translate-v2", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.5526101149 }, { "model":"google\/translate-v2", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.2612977966 }, { "model":"google\/translate-v2", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.3457225363 }, { "model":"google\/translate-v2", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/translate-v2", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/translate-v2", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/translate-v2", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/translate-v2", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/translate-v2", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/translate-v2", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/translate-v2", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/translate-v2", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.3348942842 }, { "model":"google\/translate-v2", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.5861344551 }, { "model":"google\/translate-v2", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.1311732143 }, { "model":"google\/translate-v2", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.4350789061 }, { "model":"google\/translate-v2", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.2903894802 }, { "model":"google\/translate-v2", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.5623472971 }, { "model":"google\/translate-v2", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.2190660395 }, { "model":"google\/translate-v2", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.5006362228 }, { "model":"google\/translate-v2", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.339831623 }, { "model":"google\/translate-v2", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.590846484 }, { "model":"google\/translate-v2", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.3016318322 }, { "model":"google\/translate-v2", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.5461894184 }, { "model":"google\/translate-v2", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.3491068707 }, { "model":"google\/translate-v2", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.5803894973 }, { "model":"google\/translate-v2", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.3274616019 }, { "model":"google\/translate-v2", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.5109521029 }, { "model":"google\/translate-v2", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.3962757824 }, { "model":"google\/translate-v2", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.6224286451 }, { "model":"google\/translate-v2", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.5032472209 }, { "model":"google\/translate-v2", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.7257127115 }, { "model":"google\/translate-v2", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.3361081405 }, { "model":"google\/translate-v2", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.5602875655 }, { "model":"google\/translate-v2", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.282320421 }, { "model":"google\/translate-v2", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.4830195157 }, { "model":"google\/translate-v2", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.339447252 }, { "model":"google\/translate-v2", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.5865985454 }, { "model":"google\/translate-v2", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.3120075365 }, { "model":"google\/translate-v2", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.5354876043 }, { "model":"google\/translate-v2", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.3500838996 }, { "model":"google\/translate-v2", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.5820135911 }, { "model":"google\/translate-v2", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.3833463355 }, { "model":"google\/translate-v2", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.630764328 }, { "model":"google\/translate-v2", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.2752866209 }, { "model":"google\/translate-v2", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.5470670325 }, { "model":"google\/translate-v2", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.2362408388 }, { "model":"google\/translate-v2", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.5649412405 }, { "model":"google\/translate-v2", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.2628008901 }, { "model":"google\/translate-v2", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.5081811686 }, { "model":"google\/translate-v2", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0993493347 }, { "model":"google\/translate-v2", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.456860433 }, { "model":"google\/translate-v2", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.3258874325 }, { "model":"google\/translate-v2", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.5886625327 }, { "model":"google\/translate-v2", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.321631251 }, { "model":"google\/translate-v2", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.5362369434 }, { "model":"google\/translate-v2", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.4482674529 }, { "model":"google\/translate-v2", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.671945393 }, { "model":"google\/translate-v2", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.5160129517 }, { "model":"google\/translate-v2", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.6445374779 }, { "model":"google\/translate-v2", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.3157581247 }, { "model":"google\/translate-v2", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.5682347228 }, { "model":"google\/translate-v2", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.3771434243 }, { "model":"google\/translate-v2", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.6045220423 }, { "model":"google\/translate-v2", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.3489983932 }, { "model":"google\/translate-v2", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.5800455435 }, { "model":"google\/translate-v2", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.5650298473 }, { "model":"google\/translate-v2", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.7365285421 }, { "model":"google\/translate-v2", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.3390274579 }, { "model":"google\/translate-v2", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.592940935 }, { "model":"google\/translate-v2", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.5470657372 }, { "model":"google\/translate-v2", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.7020072444 }, { "model":"google\/translate-v2", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.3013142128 }, { "model":"google\/translate-v2", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.5656623498 }, { "model":"google\/translate-v2", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.5178438056 }, { "model":"google\/translate-v2", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.6867971436 }, { "model":"google\/translate-v2", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.3354195212 }, { "model":"google\/translate-v2", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.5741211618 }, { "model":"google\/translate-v2", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.3462677897 }, { "model":"google\/translate-v2", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.5833767681 }, { "model":"google\/translate-v2", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.3412028977 }, { "model":"google\/translate-v2", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.5889369863 }, { "model":"google\/translate-v2", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.3852572206 }, { "model":"google\/translate-v2", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.5784169857 }, { "model":"google\/translate-v2", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.2698751119 }, { "model":"google\/translate-v2", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.5340401081 }, { "model":"google\/translate-v2", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.4096366215 }, { "model":"google\/translate-v2", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.5525266748 }, { "model":"google\/translate-v2", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.256568307 }, { "model":"google\/translate-v2", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.4900607089 }, { "model":"google\/translate-v2", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.1305127177 }, { "model":"google\/translate-v2", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.4214140091 }, { "model":"google\/translate-v2", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.3156233999 }, { "model":"google\/translate-v2", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.5490670273 }, { "model":"google\/translate-v2", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.1872166048 }, { "model":"google\/translate-v2", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.4920219369 }, { "model":"google\/translate-v2", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.321113344 }, { "model":"google\/translate-v2", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.5864222708 }, { "model":"google\/translate-v2", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.4884555721 }, { "model":"google\/translate-v2", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.6556325596 }, { "model":"google\/translate-v2", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.352233215 }, { "model":"google\/translate-v2", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.5470765309 }, { "model":"google\/translate-v2", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.1904552367 }, { "model":"google\/translate-v2", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.4562964405 }, { "model":"google\/translate-v2", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.3843214006 }, { "model":"google\/translate-v2", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.6136910044 }, { "model":"google\/translate-v2", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.4689482853 }, { "model":"google\/translate-v2", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.6877930778 }, { "model":"google\/translate-v2", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.4161610215 }, { "model":"google\/translate-v2", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.6455274177 }, { "model":"google\/translate-v2", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.5117468349 }, { "model":"google\/translate-v2", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.7232937985 }, { "model":"google\/translate-v2", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.3318364746 }, { "model":"google\/translate-v2", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.5738929543 }, { "model":"google\/translate-v2", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.3895968702 }, { "model":"google\/translate-v2", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.6277778554 }, { "model":"google\/translate-v2", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.4077995927 }, { "model":"google\/translate-v2", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.6409257804 }, { "model":"google\/translate-v2", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.485273374 }, { "model":"google\/translate-v2", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.6710025354 }, { "model":"google\/translate-v2", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.3269754516 }, { "model":"google\/translate-v2", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.5639027355 }, { "model":"google\/translate-v2", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.465639801 }, { "model":"google\/translate-v2", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.597395155 }, { "model":"google\/translate-v2", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.2639320429 }, { "model":"google\/translate-v2", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.5034191891 }, { "model":"google\/translate-v2", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.2139020366 }, { "model":"google\/translate-v2", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.3242506245 }, { "model":"google\/translate-v2", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.3693666266 }, { "model":"google\/translate-v2", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.6134245868 }, { "model":"google\/translate-v2", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.3936947375 }, { "model":"google\/translate-v2", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.6497039072 }, { "model":"google\/translate-v2", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.2907704167 }, { "model":"google\/translate-v2", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.5698553329 }, { "model":"google\/translate-v2", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.4401277302 }, { "model":"google\/translate-v2", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.6278421339 }, { "model":"google\/translate-v2", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.3080488172 }, { "model":"google\/translate-v2", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.5695112482 }, { "model":"google\/translate-v2", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.3188563568 }, { "model":"google\/translate-v2", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.5116789278 }, { "model":"google\/translate-v2", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.273125871 }, { "model":"google\/translate-v2", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.5508470442 }, { "model":"google\/translate-v2", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.3030324343 }, { "model":"google\/translate-v2", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.5648891805 }, { "model":"google\/translate-v2", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.3603047797 }, { "model":"google\/translate-v2", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.5927521365 }, { "model":"google\/translate-v2", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.4395780689 }, { "model":"google\/translate-v2", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.6552870615 }, { "model":"google\/translate-v2", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"google\/translate-v2", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"google\/translate-v2", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"google\/translate-v2", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"google\/translate-v2", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.3662176152 }, { "model":"google\/translate-v2", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.5856640284 }, { "model":"google\/translate-v2", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.1500486487 }, { "model":"google\/translate-v2", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.2162606152 }, { "model":"google\/translate-v2", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.3163153725 }, { "model":"google\/translate-v2", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.5712728237 }, { "model":"google\/translate-v2", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.1386875315 }, { "model":"google\/translate-v2", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.469849511 }, { "model":"google\/translate-v2", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.1925403782 }, { "model":"google\/translate-v2", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.4228528325 }, { "model":"google\/translate-v2", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0437670613 }, { "model":"google\/translate-v2", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.2053727616 }, { "model":"google\/translate-v2", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.2311956685 }, { "model":"google\/translate-v2", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.5099861434 }, { "model":"google\/translate-v2", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.281120015 }, { "model":"google\/translate-v2", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.3263629293 }, { "model":"google\/translate-v2", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.3441147842 }, { "model":"google\/translate-v2", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.6121137924 }, { "model":"google\/translate-v2", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.4192686299 }, { "model":"google\/translate-v2", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.4558586669 }, { "model":"google\/translate-v2", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.353693059 }, { "model":"google\/translate-v2", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.607730412 }, { "model":"google\/translate-v2", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.3303018306 }, { "model":"google\/translate-v2", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.5960312224 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.0750313913 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.2689370364 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.1097950919 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.2459305972 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.04291871 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.1992226055 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0012692029 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.1055962738 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.0135029462 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.1510010912 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.0033288372 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.0389705109 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.0592251547 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.2921276604 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.0366276845 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.2393327958 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.0837265107 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.3012065838 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.1120670716 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.2771296913 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.0213908698 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.2206299292 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.0285726559 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.2326683564 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.0290259599 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.2238098591 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.0597935462 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.2388670431 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.0254218054 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.1827114877 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.0012328171 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.1200208328 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.0888105743 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.2644372522 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.0668939667 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.2520473985 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.0148972561 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.2336350172 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.0047574121 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.1922357185 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.0366814427 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.2731193887 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.0173677773 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.2136838993 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.0581882104 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.2702416532 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.0562052656 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.2181774858 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.021858254 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.2327499821 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.0253088472 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.1918662187 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.1238388635 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.3442960257 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.0243559813 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.2777667131 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.0155834504 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.1833348617 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.1279598659 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.1272159331 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.3713000806 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.1249879163 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.332725923 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.1784955678 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.4008799371 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.1782663616 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.4070825897 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.0708485888 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.2987254392 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.0335582401 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.2213744022 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.2797104835 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.4601471921 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.3392404298 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.5518674496 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.1682956348 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.3705606944 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.1595479626 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.4040956812 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.0333586544 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.2582740293 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.0187498765 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.2265924477 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.1122756663 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.3654994366 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.0565617503 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.2914640343 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.1362099506 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.4039646029 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.2151785904 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.4309035319 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0308954874 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.1818653 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0168607588 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.1570666495 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.0426900866 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.2179546047 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.0232938459 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.2007105106 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.1 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.0254280801 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.1890793851 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.0144953008 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.1884647114 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.1658307051 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.3676480008 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.1711676323 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.360371738 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.0316097931 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.2354084259 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.0324437189 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.2310907497 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.1200885566 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.3203716958 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.1112414449 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.3175953836 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.1399603895 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.3604113675 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.1312450274 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.3988827234 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.1 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.0365286922 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.1990661561 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.0199391634 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.1497878674 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.0448341108 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.252793155 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.0162978516 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.2157203276 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.0992729275 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.3331935567 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.1507170285 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.4062498972 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.1222242366 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.3415186168 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.1607198845 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.2706371796 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.0550016522 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.2468917982 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.017690474 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.2536001746 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.0436064509 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.2149063632 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.0078784523 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.1640159488 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.0075610365 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.1887566902 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.0847212314 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.0397152583 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.2037873099 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.0137546968 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.1710566423 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.0564413481 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.3212656259 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.0674768387 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.1657544717 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.037013395 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.2338208281 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0011308834 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.1078312824 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.068704384 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.2935036324 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.0888104824 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.2740386167 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.039812774 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.2668100649 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.0365460997 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.2177619304 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.0221745742 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.1989640913 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.016161822 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.1752502983 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.0532822129 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.2301007371 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.022639121 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.175884413 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.0411296617 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.2442292695 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.0482775033 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.2211003771 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.1375979502 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.3518293272 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.1462646527 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.3886405702 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.1 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.2238699363 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.1 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.1606469353 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.0498052059 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.2651327526 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.0286452462 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.2023398596 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.122262497 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.3515428019 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.1527676878 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.4249238432 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.0279631361 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.2321541854 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.0195222834 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.1762015362 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0127402107 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.1651498064 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0088808864 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.1702476721 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.0366762006 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.1697470704 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.0190635247 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.1343627089 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.2292007848 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.3716655897 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.2126702079 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.3248815955 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.1085949014 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.3346783911 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.1676106101 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.3907504991 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.1013445398 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.3063125264 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.2231857524 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.4401829864 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.0955350175 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.3286351702 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.1871630014 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.3655570607 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.1005466956 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.3356518748 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.1363185356 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.3769111636 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.0189997083 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.1919557381 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.0165320564 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.1417103032 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.0441421075 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.1739314177 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.0182129294 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.119134604 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.0302406554 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.1787247799 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.0014746217 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.0931903615 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.0192199252 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.1851933727 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.016306816 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.1413916659 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.0299103049 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.1986832691 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.0138432 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.1610036541 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.1315135307 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.3968249514 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.1220351802 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.3505317727 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.0605189037 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.2644052383 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.0323301168 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.2153999563 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.1638179638 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.3812064776 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.2333909009 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.4689649165 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.0510060878 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.2132741272 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.0089146903 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.2314109768 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.0303451125 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.1748071119 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.0076177075 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.1986647775 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.1096331511 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.2888090685 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.1206114883 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.2597106436 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.0268228091 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.1751009974 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.0015866917 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.1423262509 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.0382115226 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.2562543067 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.0125933293 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.1659603426 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.0161407336 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.1568869137 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.0241402636 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.0907273605 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.2666411269 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.0495223383 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.276468397 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.1408426214 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.3768513401 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.1029189854 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.3606378352 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.023064469 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.0922502173 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.1015661134 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.0891185343 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.281793335 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.0799576366 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.2661903898 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.0287030985 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.2124751899 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.0040250398 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.1706379305 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.1389065496 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.3708687542 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.1661316612 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.354399593 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0242583204 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.1929988599 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.012814538 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.1551759179 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.0973024735 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.3076531166 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.0161682999 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.0635837055 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.0478834907 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.208762819 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.0095341532 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.150635966 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0080365175 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.1647602539 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.0909640555 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.2 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.1043191943 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.3414129274 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.0651436117 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.1002582276 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.1133138775 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.3754190494 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.1672636279 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.232325354 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.1 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.0206894768 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.1952546166 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.0113901513 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.1302687289 }, { "model":"gryphe\/mythomax-l2-13b", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.2152631134 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.4621033585 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.1438913245 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.3637806215 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.0488692805 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.2406135335 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0214880279 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.2270659336 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.1203676158 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.3241911739 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.0165994228 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.1058904177 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.2332719546 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.4924788322 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.1544598614 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.403814105 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.2609114367 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.5167379854 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.1932802581 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.4648835751 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.1270864308 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.3882289796 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1366193757 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.3691581345 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.1966421011 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.4161867731 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.1531147508 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.3635575685 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.152657571 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.4132859119 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.0663682991 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.3007728685 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.2739426076 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.5129388019 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.1227671497 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.3306179967 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.130005692 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.3815764307 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.136654027 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.3809883299 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.092815209 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.4056492611 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.2013843536 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.417738842 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.2451140745 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.4883780153 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.1334730215 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.363000921 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.1938367121 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.4674774016 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.2073541352 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.4438396219 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.260902514 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.4589913242 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.2033313823 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.4899690932 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.1866723671 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.3936568086 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.0494661624 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.320713519 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.2435247423 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.501836375 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.1956638929 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.4870538255 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.2685056004 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.506818165 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.3801657831 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.6056477234 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.2197756902 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.4571715629 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.2911113336 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.4641387139 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.2216031518 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.4429472312 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.3322887566 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.5824514758 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.1640783778 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.434749516 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3126220052 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.6038883227 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.2147534918 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.4874495537 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.094833194 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.3292486732 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.3074477197 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.5168064726 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.2380541489 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.552589393 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.2261588318 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.4944996319 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.4626001556 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.6614963779 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0704357087 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.214215478 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0113955269 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.1874434226 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.2250377214 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.4688956519 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.1515937263 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.4251249067 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.0957125553 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.3135046613 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.1387309388 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.383923158 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.3316295853 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.5594083443 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.3598049012 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.5828568956 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.1166136282 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.3855078109 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.0887957809 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.3187695245 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.2237665442 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.4896395702 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.2614626337 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.5280652466 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.2236143729 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.4912948296 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.2910526755 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.6005590773 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.0845382562 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.3303135434 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.1267144204 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.3667670284 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.1301757317 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.3568142061 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.1161068297 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.4147684511 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.2150043089 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.4693148389 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.2998342329 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.5545377546 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.2131020144 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.4845704057 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.1446650781 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.2292145443 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.1840709267 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.4042090141 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.1902389614 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.4796942089 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.1500855 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.4100527329 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.1136442629 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.4236240472 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.1899800627 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.4618900518 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.087831891 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.265907742 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.1797031918 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.446466319 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.1846339038 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.4220658756 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.1932392069 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.4204852284 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.0476115004 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.0790735292 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.0505101039 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.2502174391 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0135029462 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.1682326163 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.2778994313 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.534066621 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.1661930328 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.4001562798 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.212877318 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.4587224182 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.0873487304 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.3409755146 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.1013985932 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.3022487832 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.0611289601 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.3842495071 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.2670990652 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.5220692033 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.1981851908 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.4555314776 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.2152417217 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.4578207034 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.1654073391 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.3941079443 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.2763114217 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.5353874356 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.3315044625 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.6241092077 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.2083780287 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.4466015977 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.0879797246 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.3441521948 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.2353570133 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.4846450712 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.1632876087 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.4158370821 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.2136151785 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.4670269701 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.2202915792 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.5233013945 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.0757269477 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.2848998148 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.0329429353 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.193397393 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0206783974 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.2005424268 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0171071488 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.239253642 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.1677012885 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.4316686173 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.1202133569 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.3816566526 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.1891048622 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.4238091524 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.2086330089 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.364326938 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.1998083747 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.4667760664 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.3336132898 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.5541703282 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.2256776552 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.4793769886 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.3320822339 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.6132478102 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.1978207058 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.4682392821 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.409911871 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.6078072484 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.1730354472 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.454951133 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.3073459183 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.554890569 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.0841096684 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.2671346741 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.0409000825 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.3185422263 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.1343511225 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.385804118 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.0662090824 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.3030628402 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.1430431721 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.3858131555 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.1602151366 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.357355981 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.0399118136 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.2285191544 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.0269601697 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.3006594703 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.0705893599 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.26834967 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.0988956652 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.3896116232 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.2014976842 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.470196154 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.2922893702 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.504298223 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.1538893384 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.4005055487 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.123816759 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.4480197519 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.2739803344 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.5286596224 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.3468145126 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.6078627072 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.1798653655 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.4290347271 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.1709237193 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.522080463 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.1405088565 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.4088204523 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.2360333676 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.5038093602 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.2507091988 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.5067409185 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.2998213366 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.5112516267 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.1029119511 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.37770665 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.1507872741 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.3945276116 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.2022308722 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.4620737135 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.2592698778 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.4253112122 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.052152068 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.2578037483 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.0237309602 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.0808540368 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.2231619401 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.4653230255 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.2701457179 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.5043061571 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.2437336637 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.5042825637 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.3231504544 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.5440009645 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0461126383 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.1577698173 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0008007826 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.1426831674 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.194871957 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.4838690709 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.1908512613 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.3855814375 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.177513842 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.386230097 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.2544813414 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.4948262917 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.2003733128 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.4613442635 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.3326058501 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.5496097026 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0685259305 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.234718691 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.0319335459 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.1970529604 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.1538409962 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.4398974306 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.0373503953 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.074080246 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.0707946688 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.2881582575 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.0204932467 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.2690069759 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0339322053 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.2166922882 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0169319822 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.1617793532 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.174955946 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.4554476081 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.1400846872 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.1809201358 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.1869648376 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.4664675781 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.1696786467 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.2325846099 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.0515154857 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.2819357103 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.0804926199 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.3473550746 }, { "model":"meta-llama\/llama-3-70b-instruct", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.2561356588 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.49648558 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.2071032166 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.4109686249 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.1091965593 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.3173514683 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0490503072 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.2649288206 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.1375609672 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.3710873948 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.0526682247 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.1185738392 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.2624553878 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.5118695802 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.2175864677 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.4641969296 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.2766513185 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.5528193738 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.3371708551 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.5607841978 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.1412140088 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.4082636767 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1613157357 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.3896124669 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.2374604323 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.4585263555 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.2300327193 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.43482663 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.220118305 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.4767360664 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.0623766799 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.2986098722 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.3261928856 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.5339015736 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.2330787096 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.4427484336 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.1767618659 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.4088166263 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.1381246624 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.3730280956 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.161760748 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.450732576 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.2170520787 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.4098923096 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.2731033294 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.5159041397 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.2075901182 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.4086862509 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.2623045124 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.512895511 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.3328087961 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.5013967236 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.36364203 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.5712362729 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.2701190878 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.551918321 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.1903904403 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.4687496067 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.0520773173 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.3368230674 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.3416129059 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.5845038999 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.2726323508 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.5283879512 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.3059254014 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.5403551155 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.4588037752 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.6692431614 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.268843518 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.493449014 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.3403168702 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.5086292148 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.5486108614 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.6873139374 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.691450825 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.8407064328 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.3021494986 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.5604145602 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3865320679 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.6126903448 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.3383579693 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.5641679075 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.2069613925 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.4288746449 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.3771163962 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.5827777548 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.3225619014 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.5830090459 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.2924853239 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.5462367408 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.4963618411 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.682573515 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0727409119 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.1973579541 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0147511412 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.1172295571 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.3220032872 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.537784261 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.2206953431 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.4990640113 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.1527043255 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.4074071592 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.1567942198 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.4125213011 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.3914590212 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.6128431348 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.4189427376 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.6262550579 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.2338808528 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.4544102907 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.1243598882 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.4051657211 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.2678903597 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.520149627 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.3361911519 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.5583527487 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.290274553 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.5143960108 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.3789023659 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.6751523776 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.1640789976 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.4462743519 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.1964520184 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.3861299089 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.1899251487 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.428775702 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.1861284915 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.4568269097 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.296179579 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.5259369403 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.3187240753 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.5786749514 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.3306727326 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.5648258387 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.262512317 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.4036795798 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.2593989014 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.4825732152 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.2177203514 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.452576603 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.2013677498 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.4740327886 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.2015887265 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.4935901226 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.3098535214 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.5740290935 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.0913702814 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.3128837987 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.2155836452 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.4583181839 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.2468017951 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.4734415865 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.2276258723 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.4577745447 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.3046437152 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.3825631739 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.1034813211 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.2900727397 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0357350273 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.2486408005 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.3838720489 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.5776118345 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.200683389 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.4892134584 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.2846718719 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.5239506053 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.135802286 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.3994207414 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.1470288737 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.370597998 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.0624257747 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.3667603362 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.271926141 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.5318057054 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.2579782083 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.4806502272 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.2987546297 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.526361525 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.1515083487 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.4332812085 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.3458633411 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.5703964991 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.4050410338 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.6535211779 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.2598947984 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.4881220333 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.2015903544 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.4513077936 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.260901002 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.4896606547 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.1429173328 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.4071639857 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.2472539314 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.4972878378 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.2865823477 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.5850706516 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.0888165228 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.2927704081 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.0527161443 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.3183888298 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.024472012 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.2143061298 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0100859589 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.2528047704 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.285587224 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.5216659729 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.1281773816 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.3720376754 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.4479091606 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.6272058507 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.4155919737 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.5685427433 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.2568309796 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.5058927884 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.3323405641 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.5569354008 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.3183331223 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.5559144449 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.4500659682 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.6761551234 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.2803966495 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.5308215606 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.5346576918 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.7024180686 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.2051262499 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.4837372958 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.3699194641 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.5770913921 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.1806014296 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.4291519278 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.0966721561 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.3133383199 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.2666171334 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.4765001737 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.1168753501 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.3522716786 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.2270309753 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.4841536531 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.1352090178 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.3326584955 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.067782587 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.2594122638 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.0496762437 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.2555516699 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.1103212906 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.3317936338 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.1178727843 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.3706477532 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.2704960778 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.5452851397 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.395555296 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.5799018584 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.253350788 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.458172945 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.1663559354 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.4946292339 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.3079953173 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.5447429639 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.3935950974 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.6342780862 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.2983575506 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.5283269577 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.3087303367 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.6028401745 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.2210855899 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.4616487061 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.2867265678 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.5646798034 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.357367421 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.5659157865 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.3515649071 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.5645706042 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.1896909211 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.4144350541 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.202234159 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.4284203038 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.2422208114 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.4745802383 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.3118363896 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.4892437324 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.0689409767 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.2722531305 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.0406781073 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.0970886698 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.2772171859 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.5118206984 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.3271066365 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.5803660329 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.2451818788 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.4929183421 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.2948409512 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.5143250682 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0677223644 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.1661136189 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0419433658 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.1567633534 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.2584265792 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.5020697076 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.2624388601 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.4452300688 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.2640324297 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.5196545965 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.2748329219 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.5341998684 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.2872551102 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.5360250569 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.3575088107 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.5879974234 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0892196115 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.266303191 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.0392376693 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.1442620012 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.2174626032 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.4686565248 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.0941401506 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1690248565 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.0921572696 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.3072945662 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.052810575 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.2387421258 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0706373037 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.2741240176 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0463945559 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.1664955069 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.2098595469 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.4603190119 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.0831701767 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2065328404 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.2619931658 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.5251502482 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.2725475868 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.3148065512 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.1611133093 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.379825756 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.0702261565 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.3706020457 }, { "model":"meta-llama\/llama-3.1-70b-instruct", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.2279903683 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.4722573355 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.1435174722 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.3991235315 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.1058038471 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.3048469769 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0491812173 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.2808449794 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.1452228976 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.3699202818 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.0630164833 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.1755172285 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.2933386948 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.5447211689 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.2166524228 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.4496497227 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.2997360932 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.552448295 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.2837522278 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.5150814494 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.1559168311 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.4268663808 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1391913129 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.3773300026 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.1936957127 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.4274660929 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.2338775014 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.4546098648 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.2240336457 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.456891102 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.0249457171 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.237299794 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.3606359676 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.5477908661 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.1847580734 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.3911269476 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.1576463626 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.3910005157 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.1690495289 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.3930640761 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.1487255467 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.4407404732 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.2175383868 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.4344749015 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.3061950313 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.5535821276 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.1811004213 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.3534531968 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.2646772038 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.5157826791 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.331255344 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.5093924414 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.3806413844 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.5722776653 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.2863154138 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.5492472281 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.181535472 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.4498383877 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.0712120544 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.3404533027 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.2928040954 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.5416342014 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.3623393932 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.5863625454 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.3129672706 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.5456543979 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.4852409005 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.6908376394 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.2581336709 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.4704308834 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.3342775397 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.5026657233 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.5510215557 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.6909834226 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.6679215449 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.823262947 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.3139331841 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.5454623234 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3561290923 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.599796306 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.3069040556 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.5327832177 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.2185015953 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.4264089038 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.3745780882 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.5785175063 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.2904415478 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.5908280404 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.2963449909 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.5544997379 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.5091700689 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.690320784 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0862885919 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.2372420697 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0261732885 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.168027641 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.295613677 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.5221513183 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.1697878702 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.4619303787 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.1704488365 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.4403441536 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.1466885285 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.4179596519 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.385731086 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.6034244629 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.357812342 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.5894721809 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.2543010782 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.4974062413 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.1038893205 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.3250555425 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.276319154 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.5531277158 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.3130631115 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.576837791 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.3143630083 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.5621460006 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.3372718385 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.6543894215 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.1641186812 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.4109013799 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.1375093856 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.3810358014 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.2236376263 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.4348813399 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.1135128656 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.4345857133 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.2934122255 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.5393871714 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.3100077394 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.5714730187 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.284322765 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.5304019177 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.2212745751 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.355950114 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.2779520489 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.5022141687 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.2476437073 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.4805413308 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.1839138217 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.4860524069 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.170404008 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.4549282359 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.2759135194 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.5427685716 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.1237988917 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.3265823778 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.2379701997 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.4807978998 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.2068140088 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.4615143451 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.2508944927 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.4998320266 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.210142355 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.3127505848 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.0890010757 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.2623679578 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0406768013 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.2265562343 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.3934799806 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.6000359011 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.2497480714 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.5149579975 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.2776870629 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.5361437897 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.101231398 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.3867790942 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.1710070826 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.3961869922 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.0586978059 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.3322172345 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.2566290969 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.5196341734 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.2701878605 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.4964908212 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.3224419544 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.5606155155 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.1663847917 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.4383662593 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.3103894957 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.5854645421 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.3913336262 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.6467989318 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.2392486974 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.4920626101 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.2130836675 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.4622075168 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.2893481535 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.5215715176 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.1419402772 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.4014256358 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.249611031 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.4991029967 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.2942599953 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.5860228525 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.0973366086 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.3232614896 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.0552969578 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.3314346183 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0356994946 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.2438060785 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0183592041 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.3076327609 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.2403245803 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.5023246313 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.1415388613 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.4055113288 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.4519562833 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.6328740374 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.3918546765 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.551810658 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.2474554181 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.490135462 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.3387973296 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.5550868321 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.3065957195 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.5567333989 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.4450524918 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.6712742861 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.2717880574 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.5367497902 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.4990357373 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.6703309998 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.2184907643 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.4824691404 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.3406091079 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.5689518318 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.1780482269 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.4251975218 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.1238628432 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.3651594596 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.2363136631 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.4724628618 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.1012580658 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.3226539734 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.1702954814 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.4314499751 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.1165218233 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.3219658957 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.0415760658 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.2726763268 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.059393934 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.3265574234 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.0974935448 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.337395124 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.0786475166 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.3616383914 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.2468393111 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.497224405 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.4165200238 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.60128551 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.2331294709 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.4605502791 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.1664653883 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.4762879225 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.3181506443 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.5441377883 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.384958542 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.6364660715 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.2373393477 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.5017230165 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.2334583695 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.5603415221 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.2147204762 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.4649686586 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.2451735521 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.5352886898 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.3767393472 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.5870286691 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.3415510311 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.5592933672 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.1801710665 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.4238537274 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.1125461134 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.3456654305 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.2385233061 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.4987667959 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.3202315883 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.4748886274 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.0653357736 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.3152990905 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.0468367135 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.1314286197 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.2786347493 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.5143010521 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.3361615644 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.5732166456 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.2384384134 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.4989290832 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.4068700974 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.5784786574 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0295327628 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.168725075 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.047075079 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.2446929278 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.252958993 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.5053979802 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.2339821201 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.425306622 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.236632122 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.4793696196 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.265421135 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.5116987882 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.2721498467 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.5200799335 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.3613736416 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.5866656133 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0887261142 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.2752257416 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.0469317169 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.2008453897 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.1844305556 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.4417239043 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.1248600823 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1650750126 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.0773908628 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.2972337309 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.0211167911 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.2699477659 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0578164805 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.2633608218 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0591720568 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.1753356197 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.2018552397 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.4546838419 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.1477972133 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.238559837 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.2259180607 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.5258178103 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.2474954475 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.2841722148 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.1511814979 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.3667501588 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.0635700737 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.3690469819 }, { "model":"meta-llama\/llama-3.3-70b-instruct", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.3304277157 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.5443905094 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.1851657228 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.4035662808 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.109249521 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.3018012797 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0601412463 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.3378209538 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.278050095 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.5030892659 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.2056413876 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.319475702 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.3412628716 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.6168944847 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.2079777284 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.4737326062 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.3288891576 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.560493221 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.3370289388 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.5616376735 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.1877842343 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.4430965745 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1822974398 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.3877585115 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.2820784803 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.5005600008 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.1762767818 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.3980584269 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.2678950965 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.5139106802 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.1062771627 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.3373123707 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.3378377362 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.5424277928 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.2039436913 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.413295677 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.2078798411 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.4443945632 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.1864199422 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.4220840798 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.1482839317 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.4568926673 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.2815774482 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.4716025494 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.2239129937 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.4981762083 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.1952639614 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.40510597 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.3130233588 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.5569163893 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.3322667951 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.5140240989 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.3401088117 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.5684561927 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.4507021781 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.6542740054 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.2563123252 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.517826657 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.2261228199 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.4918700987 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.3778406936 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.6293186521 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.4207498261 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.6224036774 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.386239845 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.6110300223 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.4508475568 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.6653203029 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.3208174129 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.5441131834 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.3167289307 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.5086510118 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.5649266234 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.7240609445 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.6160790992 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.8133281991 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.3152773331 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.5548531112 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3335962816 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.5859881472 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.2974755741 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.548510072 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.198877694 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.4016676481 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.3394346908 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.5772205685 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.2961737536 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.5732830973 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.3174175523 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.5682518332 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.5173973527 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.7056428374 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0282301718 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.2509358266 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0302470726 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.2143808411 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.336179684 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.5448761462 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.1255076156 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.4301109075 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.2176466652 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.4472887488 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.2048605344 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.4547067722 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.3796955055 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.6056742688 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.382370623 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.5922578575 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.2583771315 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.4968818998 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.116404849 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.355624191 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.2893259192 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.5696888872 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.3664678104 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.6152145331 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.2523550022 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.5458277736 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.3774331947 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.6486159416 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.2004372781 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.4254008414 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.2594342647 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.4939201844 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.1883645682 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.4379611856 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.166061461 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.4725232576 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.2923994901 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.5447705341 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.4037821428 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.6296260979 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.3130983776 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.5536124921 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.3266051607 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.4444671407 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.3146151088 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.5434123174 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.3068652176 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.5696595268 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.2107380254 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.4991705013 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.331371608 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.5825812793 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.3610313078 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.605032383 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.1827077293 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.4206815495 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.273765965 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.5541955864 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.2689874625 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.5150630417 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.2599758467 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.4915631618 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.1778560135 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.2969773205 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.0622795727 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.2675764955 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0381251381 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.2812186233 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.2963258613 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.5478698134 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.2490501232 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.470734921 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.2517024761 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.5071223357 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.1690507631 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.4610312304 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.245769233 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.4441647844 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.1869691691 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.4993277276 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.3697837131 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.598263628 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.2866761532 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.5500074549 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.3125095049 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.5535296132 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.2275791183 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.4569915545 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.345465339 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.5897544047 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.4313143535 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.672362003 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.3075903861 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.578349632 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.1897910105 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.4717519215 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.3557303786 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.5879008408 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.1851047496 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.4783025401 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.2465270522 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.5037852263 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.3149743955 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.5635296931 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.1800815958 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.4077560746 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.0667158921 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.3678415876 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0536067183 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.2934406231 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0212136796 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.3367052821 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.2884546263 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.5474826718 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.2053276951 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.4215800492 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.4246522462 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.6364852561 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.439726575 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.5806732389 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.2708043929 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.5320865131 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.2827359953 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.5407300006 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.3451568022 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.5845365036 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.4582156105 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.6491023878 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.3130092522 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.5728467895 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.503492173 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.6731092747 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.2567420946 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.5061211552 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.4025875747 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.6083538055 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.1596218382 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.3642420922 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.1505395069 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.4131000516 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.3060556532 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.5236224481 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.2303677631 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.4550652237 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.2505378464 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.4777570712 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.238193304 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.4255249112 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.1176743345 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.3404474685 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.1289978109 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.426186094 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.2374825749 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.4257984603 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.1278040594 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.4360467159 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.2878836927 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.5437304451 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.4038152012 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.5978824564 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.2373274152 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.4608187705 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.1899355362 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.4693159323 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.298223826 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.5684457257 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.3825937295 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.6393626909 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.3425500041 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.5680420054 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.2980763573 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.5924685945 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.2473591284 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.5382867852 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.3068229029 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.5637367471 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.3462186566 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.6055817314 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.3822149946 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.5783618359 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.2141866571 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.4746491206 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.2383903304 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.4750617701 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.2652851581 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.5278626321 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.3718263092 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.5255136074 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.1999962108 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.4183108341 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.1133461632 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.2113810541 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.2978237586 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.5152360665 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.3447394658 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.5850690403 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.2589937034 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.5348601679 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.3544986277 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.5913345073 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0650028377 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.1912574022 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0525305732 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.2699878572 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.253549974 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.5235064606 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.2484556869 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.429442787 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.2701010494 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.5241051692 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.3181656056 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.5907906511 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.2670924013 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.5198891912 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.3597766713 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.6081806669 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0741464388 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.2660826012 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.0695734356 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.2997632689 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.2653942694 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.4893923691 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.1157826458 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1765005496 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.1376742076 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.3734228567 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.0864151864 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.4104899998 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0937886749 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.3196766983 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.1093519063 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.3224056963 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.2204568545 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.4894165826 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.1569463992 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.232785021 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.2703289724 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.5473102513 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.317769874 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.3676906362 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.2474518428 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.5019415605 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.19092807 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.4983188666 }, { "model":"meta-llama\/llama-4-maverick", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.1327783313 }, { "model":"microsoft\/phi-4", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.3636961218 }, { "model":"microsoft\/phi-4", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.1557273583 }, { "model":"microsoft\/phi-4", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.3515058711 }, { "model":"microsoft\/phi-4", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.0375398146 }, { "model":"microsoft\/phi-4", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.1986406573 }, { "model":"microsoft\/phi-4", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0015655622 }, { "model":"microsoft\/phi-4", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.0623964125 }, { "model":"microsoft\/phi-4", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.0476085337 }, { "model":"microsoft\/phi-4", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.205897506 }, { "model":"microsoft\/phi-4", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.0110136998 }, { "model":"microsoft\/phi-4", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.0688367427 }, { "model":"microsoft\/phi-4", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.0927070911 }, { "model":"microsoft\/phi-4", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.3300356171 }, { "model":"microsoft\/phi-4", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.1909661669 }, { "model":"microsoft\/phi-4", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.4391780261 }, { "model":"microsoft\/phi-4", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.1548779531 }, { "model":"microsoft\/phi-4", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.3887963415 }, { "model":"microsoft\/phi-4", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.268706305 }, { "model":"microsoft\/phi-4", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.4959259833 }, { "model":"microsoft\/phi-4", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.0374286633 }, { "model":"microsoft\/phi-4", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.276784029 }, { "model":"microsoft\/phi-4", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1363017113 }, { "model":"microsoft\/phi-4", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.3536429421 }, { "model":"microsoft\/phi-4", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.106947781 }, { "model":"microsoft\/phi-4", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.3169488071 }, { "model":"microsoft\/phi-4", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.1479958867 }, { "model":"microsoft\/phi-4", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.3761797641 }, { "model":"microsoft\/phi-4", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.0312538317 }, { "model":"microsoft\/phi-4", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.2922637643 }, { "model":"microsoft\/phi-4", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.0234894436 }, { "model":"microsoft\/phi-4", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.2225963414 }, { "model":"microsoft\/phi-4", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.2171579973 }, { "model":"microsoft\/phi-4", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.456713607 }, { "model":"microsoft\/phi-4", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.1681044686 }, { "model":"microsoft\/phi-4", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.3440820027 }, { "model":"microsoft\/phi-4", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.0298418752 }, { "model":"microsoft\/phi-4", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.2580610439 }, { "model":"microsoft\/phi-4", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.041095966 }, { "model":"microsoft\/phi-4", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.2930729253 }, { "model":"microsoft\/phi-4", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.0556571943 }, { "model":"microsoft\/phi-4", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.3548784075 }, { "model":"microsoft\/phi-4", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.066482798 }, { "model":"microsoft\/phi-4", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.3295363828 }, { "model":"microsoft\/phi-4", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.1569501012 }, { "model":"microsoft\/phi-4", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.4039420627 }, { "model":"microsoft\/phi-4", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.1151073387 }, { "model":"microsoft\/phi-4", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.3126448605 }, { "model":"microsoft\/phi-4", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.095240952 }, { "model":"microsoft\/phi-4", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.3840548344 }, { "model":"microsoft\/phi-4", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.1876367188 }, { "model":"microsoft\/phi-4", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.4310988737 }, { "model":"microsoft\/phi-4", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.1916153649 }, { "model":"microsoft\/phi-4", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.4266920518 }, { "model":"microsoft\/phi-4", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.0532026402 }, { "model":"microsoft\/phi-4", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.2442653709 }, { "model":"microsoft\/phi-4", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.0233838479 }, { "model":"microsoft\/phi-4", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.2071232952 }, { "model":"microsoft\/phi-4", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.0030055044 }, { "model":"microsoft\/phi-4", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.1680771697 }, { "model":"microsoft\/phi-4", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.1169562212 }, { "model":"microsoft\/phi-4", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.4547809891 }, { "model":"microsoft\/phi-4", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.2167266047 }, { "model":"microsoft\/phi-4", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.4629060689 }, { "model":"microsoft\/phi-4", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.2546126219 }, { "model":"microsoft\/phi-4", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.4840060449 }, { "model":"microsoft\/phi-4", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.3171866034 }, { "model":"microsoft\/phi-4", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.5752285995 }, { "model":"microsoft\/phi-4", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.1236158233 }, { "model":"microsoft\/phi-4", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.3922493462 }, { "model":"microsoft\/phi-4", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.1783375751 }, { "model":"microsoft\/phi-4", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.4003787241 }, { "model":"microsoft\/phi-4", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.3642023499 }, { "model":"microsoft\/phi-4", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.5697992815 }, { "model":"microsoft\/phi-4", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.4959810553 }, { "model":"microsoft\/phi-4", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.7232313255 }, { "model":"microsoft\/phi-4", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.1732534835 }, { "model":"microsoft\/phi-4", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.4434970776 }, { "model":"microsoft\/phi-4", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3605235101 }, { "model":"microsoft\/phi-4", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.5849733787 }, { "model":"microsoft\/phi-4", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.0868573088 }, { "model":"microsoft\/phi-4", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.3068232268 }, { "model":"microsoft\/phi-4", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.0883262705 }, { "model":"microsoft\/phi-4", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.3294670602 }, { "model":"microsoft\/phi-4", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.1921595243 }, { "model":"microsoft\/phi-4", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.4381909531 }, { "model":"microsoft\/phi-4", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.2102834142 }, { "model":"microsoft\/phi-4", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.503934087 }, { "model":"microsoft\/phi-4", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.1763652726 }, { "model":"microsoft\/phi-4", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.4428784232 }, { "model":"microsoft\/phi-4", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.3772793055 }, { "model":"microsoft\/phi-4", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.5820724576 }, { "model":"microsoft\/phi-4", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0067893116 }, { "model":"microsoft\/phi-4", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.2007893146 }, { "model":"microsoft\/phi-4", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0132875082 }, { "model":"microsoft\/phi-4", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.1300679396 }, { "model":"microsoft\/phi-4", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.1581139234 }, { "model":"microsoft\/phi-4", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.3992847318 }, { "model":"microsoft\/phi-4", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.1006122628 }, { "model":"microsoft\/phi-4", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.3529986856 }, { "model":"microsoft\/phi-4", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.0310807341 }, { "model":"microsoft\/phi-4", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.2470826922 }, { "model":"microsoft\/phi-4", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.0634023566 }, { "model":"microsoft\/phi-4", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.2955476351 }, { "model":"microsoft\/phi-4", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.2788689746 }, { "model":"microsoft\/phi-4", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.5417455941 }, { "model":"microsoft\/phi-4", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.2369610218 }, { "model":"microsoft\/phi-4", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.3765795877 }, { "model":"microsoft\/phi-4", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.1094117889 }, { "model":"microsoft\/phi-4", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.3715115564 }, { "model":"microsoft\/phi-4", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.0627224628 }, { "model":"microsoft\/phi-4", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.3120135336 }, { "model":"microsoft\/phi-4", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.1148528139 }, { "model":"microsoft\/phi-4", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.3969632133 }, { "model":"microsoft\/phi-4", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.2004789157 }, { "model":"microsoft\/phi-4", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.4541545495 }, { "model":"microsoft\/phi-4", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.0855471394 }, { "model":"microsoft\/phi-4", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.3720740561 }, { "model":"microsoft\/phi-4", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.284365864 }, { "model":"microsoft\/phi-4", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.589202199 }, { "model":"microsoft\/phi-4", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.0344910359 }, { "model":"microsoft\/phi-4", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.2176253825 }, { "model":"microsoft\/phi-4", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.0020930717 }, { "model":"microsoft\/phi-4", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.0498013123 }, { "model":"microsoft\/phi-4", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.0788515324 }, { "model":"microsoft\/phi-4", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.3023201397 }, { "model":"microsoft\/phi-4", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.0177354807 }, { "model":"microsoft\/phi-4", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.2429763441 }, { "model":"microsoft\/phi-4", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.1849581121 }, { "model":"microsoft\/phi-4", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.4562979327 }, { "model":"microsoft\/phi-4", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.2591551222 }, { "model":"microsoft\/phi-4", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.5311194854 }, { "model":"microsoft\/phi-4", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.1462066826 }, { "model":"microsoft\/phi-4", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.4299215293 }, { "model":"microsoft\/phi-4", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.2371087689 }, { "model":"microsoft\/phi-4", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.4064089202 }, { "model":"microsoft\/phi-4", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.1102793601 }, { "model":"microsoft\/phi-4", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.3718502317 }, { "model":"microsoft\/phi-4", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.0999035402 }, { "model":"microsoft\/phi-4", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.3664761129 }, { "model":"microsoft\/phi-4", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.0537848954 }, { "model":"microsoft\/phi-4", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.3139411656 }, { "model":"microsoft\/phi-4", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.0666822222 }, { "model":"microsoft\/phi-4", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.3622322436 }, { "model":"microsoft\/phi-4", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.0753739979 }, { "model":"microsoft\/phi-4", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.3445771251 }, { "model":"microsoft\/phi-4", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.1701733674 }, { "model":"microsoft\/phi-4", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.104202131 }, { "model":"microsoft\/phi-4", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.3590734072 }, { "model":"microsoft\/phi-4", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.0705324379 }, { "model":"microsoft\/phi-4", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.3025321109 }, { "model":"microsoft\/phi-4", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.1107444823 }, { "model":"microsoft\/phi-4", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.3916459404 }, { "model":"microsoft\/phi-4", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.1649626358 }, { "model":"microsoft\/phi-4", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.2656552119 }, { "model":"microsoft\/phi-4", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.0539171508 }, { "model":"microsoft\/phi-4", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.2436825008 }, { "model":"microsoft\/phi-4", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.1581276083 }, { "model":"microsoft\/phi-4", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.1658903033 }, { "model":"microsoft\/phi-4", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.4217933103 }, { "model":"microsoft\/phi-4", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.0803859812 }, { "model":"microsoft\/phi-4", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.3220461814 }, { "model":"microsoft\/phi-4", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.1018185799 }, { "model":"microsoft\/phi-4", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.3881585962 }, { "model":"microsoft\/phi-4", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.0840554004 }, { "model":"microsoft\/phi-4", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.3260852936 }, { "model":"microsoft\/phi-4", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.036255172 }, { "model":"microsoft\/phi-4", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.2361171448 }, { "model":"microsoft\/phi-4", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.0369324798 }, { "model":"microsoft\/phi-4", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.3426601677 }, { "model":"microsoft\/phi-4", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.1675392326 }, { "model":"microsoft\/phi-4", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.4161590898 }, { "model":"microsoft\/phi-4", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.101823454 }, { "model":"microsoft\/phi-4", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.3124446375 }, { "model":"microsoft\/phi-4", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.1012625471 }, { "model":"microsoft\/phi-4", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.3580813711 }, { "model":"microsoft\/phi-4", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.0891010327 }, { "model":"microsoft\/phi-4", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.3158325956 }, { "model":"microsoft\/phi-4", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.1211248924 }, { "model":"microsoft\/phi-4", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.389246098 }, { "model":"microsoft\/phi-4", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.2367123999 }, { "model":"microsoft\/phi-4", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.5273473365 }, { "model":"microsoft\/phi-4", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.1262296798 }, { "model":"microsoft\/phi-4", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.3480250641 }, { "model":"microsoft\/phi-4", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.104091386 }, { "model":"microsoft\/phi-4", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.3135377948 }, { "model":"microsoft\/phi-4", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.1488007297 }, { "model":"microsoft\/phi-4", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.4132412315 }, { "model":"microsoft\/phi-4", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.0478735067 }, { "model":"microsoft\/phi-4", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.3069618299 }, { "model":"microsoft\/phi-4", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.1523025562 }, { "model":"microsoft\/phi-4", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.4096891017 }, { "model":"microsoft\/phi-4", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.2469695748 }, { "model":"microsoft\/phi-4", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.524876157 }, { "model":"microsoft\/phi-4", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.0369494139 }, { "model":"microsoft\/phi-4", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.2350129808 }, { "model":"microsoft\/phi-4", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.0015518794 }, { "model":"microsoft\/phi-4", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.1140211549 }, { "model":"microsoft\/phi-4", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0103766134 }, { "model":"microsoft\/phi-4", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.1821363344 }, { "model":"microsoft\/phi-4", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0003856632 }, { "model":"microsoft\/phi-4", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.0985339751 }, { "model":"microsoft\/phi-4", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.1279029727 }, { "model":"microsoft\/phi-4", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.3504496172 }, { "model":"microsoft\/phi-4", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.1158076498 }, { "model":"microsoft\/phi-4", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.3583374616 }, { "model":"microsoft\/phi-4", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.3550414512 }, { "model":"microsoft\/phi-4", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.5626107823 }, { "model":"microsoft\/phi-4", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.2784963846 }, { "model":"microsoft\/phi-4", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.4121299981 }, { "model":"microsoft\/phi-4", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.1049411882 }, { "model":"microsoft\/phi-4", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.371724232 }, { "model":"microsoft\/phi-4", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.2126550777 }, { "model":"microsoft\/phi-4", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.4754992095 }, { "model":"microsoft\/phi-4", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.1813353123 }, { "model":"microsoft\/phi-4", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.4632560004 }, { "model":"microsoft\/phi-4", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.3450201321 }, { "model":"microsoft\/phi-4", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.5827805827 }, { "model":"microsoft\/phi-4", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.1323104842 }, { "model":"microsoft\/phi-4", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.3747307468 }, { "model":"microsoft\/phi-4", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.2733723845 }, { "model":"microsoft\/phi-4", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.5057937589 }, { "model":"microsoft\/phi-4", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.164734586 }, { "model":"microsoft\/phi-4", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.4400610126 }, { "model":"microsoft\/phi-4", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.2884407046 }, { "model":"microsoft\/phi-4", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.5338739518 }, { "model":"microsoft\/phi-4", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.0202859007 }, { "model":"microsoft\/phi-4", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.2084128437 }, { "model":"microsoft\/phi-4", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.0129709626 }, { "model":"microsoft\/phi-4", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.1407028363 }, { "model":"microsoft\/phi-4", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.0476641683 }, { "model":"microsoft\/phi-4", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.1691869095 }, { "model":"microsoft\/phi-4", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.0111247819 }, { "model":"microsoft\/phi-4", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.151377306 }, { "model":"microsoft\/phi-4", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.0307653909 }, { "model":"microsoft\/phi-4", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.2382457281 }, { "model":"microsoft\/phi-4", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.0240096696 }, { "model":"microsoft\/phi-4", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.154444722 }, { "model":"microsoft\/phi-4", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.0116292791 }, { "model":"microsoft\/phi-4", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.1897831748 }, { "model":"microsoft\/phi-4", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.000876482 }, { "model":"microsoft\/phi-4", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.066397943 }, { "model":"microsoft\/phi-4", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.0221364496 }, { "model":"microsoft\/phi-4", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.2036637198 }, { "model":"microsoft\/phi-4", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.0300378344 }, { "model":"microsoft\/phi-4", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.146034089 }, { "model":"microsoft\/phi-4", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.1551096033 }, { "model":"microsoft\/phi-4", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.4297549368 }, { "model":"microsoft\/phi-4", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.200397515 }, { "model":"microsoft\/phi-4", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.4351193348 }, { "model":"microsoft\/phi-4", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.0631852964 }, { "model":"microsoft\/phi-4", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.3127999721 }, { "model":"microsoft\/phi-4", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.0328870671 }, { "model":"microsoft\/phi-4", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.285042966 }, { "model":"microsoft\/phi-4", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.1749499193 }, { "model":"microsoft\/phi-4", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.4691275614 }, { "model":"microsoft\/phi-4", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.2454574882 }, { "model":"microsoft\/phi-4", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.5348019826 }, { "model":"microsoft\/phi-4", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.1325294802 }, { "model":"microsoft\/phi-4", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.4051925402 }, { "model":"microsoft\/phi-4", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.1631216823 }, { "model":"microsoft\/phi-4", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.4696161488 }, { "model":"microsoft\/phi-4", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.1338729952 }, { "model":"microsoft\/phi-4", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.3640492116 }, { "model":"microsoft\/phi-4", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.1115670494 }, { "model":"microsoft\/phi-4", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.4303510763 }, { "model":"microsoft\/phi-4", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.2374723306 }, { "model":"microsoft\/phi-4", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.4675485501 }, { "model":"microsoft\/phi-4", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.2528444882 }, { "model":"microsoft\/phi-4", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.4794045124 }, { "model":"microsoft\/phi-4", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.00699528 }, { "model":"microsoft\/phi-4", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.2382738034 }, { "model":"microsoft\/phi-4", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.0018437478 }, { "model":"microsoft\/phi-4", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.2225002567 }, { "model":"microsoft\/phi-4", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.1140490825 }, { "model":"microsoft\/phi-4", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.3668015685 }, { "model":"microsoft\/phi-4", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.1317498141 }, { "model":"microsoft\/phi-4", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.2770372268 }, { "model":"microsoft\/phi-4", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.0342955291 }, { "model":"microsoft\/phi-4", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.2066101372 }, { "model":"microsoft\/phi-4", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.0101983319 }, { "model":"microsoft\/phi-4", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.0539238863 }, { "model":"microsoft\/phi-4", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.1209396556 }, { "model":"microsoft\/phi-4", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.3409074931 }, { "model":"microsoft\/phi-4", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.270580586 }, { "model":"microsoft\/phi-4", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.537606561 }, { "model":"microsoft\/phi-4", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.1285741979 }, { "model":"microsoft\/phi-4", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.4057727321 }, { "model":"microsoft\/phi-4", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.1865914948 }, { "model":"microsoft\/phi-4", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.4565231191 }, { "model":"microsoft\/phi-4", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0177160721 }, { "model":"microsoft\/phi-4", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.1342994379 }, { "model":"microsoft\/phi-4", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0010895392 }, { "model":"microsoft\/phi-4", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.0283042279 }, { "model":"microsoft\/phi-4", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.1155003818 }, { "model":"microsoft\/phi-4", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.3250077925 }, { "model":"microsoft\/phi-4", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.1195513435 }, { "model":"microsoft\/phi-4", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.3158904676 }, { "model":"microsoft\/phi-4", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.0676473408 }, { "model":"microsoft\/phi-4", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.2672641675 }, { "model":"microsoft\/phi-4", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.0166808106 }, { "model":"microsoft\/phi-4", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.2361978954 }, { "model":"microsoft\/phi-4", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.1452425625 }, { "model":"microsoft\/phi-4", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.3941195385 }, { "model":"microsoft\/phi-4", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.2680553268 }, { "model":"microsoft\/phi-4", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.5055559664 }, { "model":"microsoft\/phi-4", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0218129891 }, { "model":"microsoft\/phi-4", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.2316696377 }, { "model":"microsoft\/phi-4", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.0005135911 }, { "model":"microsoft\/phi-4", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.0644762753 }, { "model":"microsoft\/phi-4", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.0721904827 }, { "model":"microsoft\/phi-4", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.3322122834 }, { "model":"microsoft\/phi-4", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.0963078281 }, { "model":"microsoft\/phi-4", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1506583582 }, { "model":"microsoft\/phi-4", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.0658670408 }, { "model":"microsoft\/phi-4", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.2264957148 }, { "model":"microsoft\/phi-4", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.0240888197 }, { "model":"microsoft\/phi-4", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.1589846026 }, { "model":"microsoft\/phi-4", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0361610953 }, { "model":"microsoft\/phi-4", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.2209617429 }, { "model":"microsoft\/phi-4", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0152526027 }, { "model":"microsoft\/phi-4", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.1066841292 }, { "model":"microsoft\/phi-4", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.1360263411 }, { "model":"microsoft\/phi-4", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.4135302369 }, { "model":"microsoft\/phi-4", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.1427052583 }, { "model":"microsoft\/phi-4", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2190638456 }, { "model":"microsoft\/phi-4", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.1516445239 }, { "model":"microsoft\/phi-4", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.4517979691 }, { "model":"microsoft\/phi-4", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.2058198052 }, { "model":"microsoft\/phi-4", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.2799124898 }, { "model":"microsoft\/phi-4", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.0158837296 }, { "model":"microsoft\/phi-4", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.2111229219 }, { "model":"microsoft\/phi-4", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.0440027048 }, { "model":"microsoft\/phi-4", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.1856579938 }, { "model":"microsoft\/phi-4", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.1381244544 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.3106084366 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.1319133586 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.2261398681 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.0101349522 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.0727946226 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0087636854 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.0326918009 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.0143438883 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.0651139855 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.0145699741 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.1077126314 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.3303312588 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.0568249639 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.1985159581 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.1428907436 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.3107041775 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.1849770017 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.2932088535 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.0246413933 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.2220905764 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.0462912201 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.2082310898 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.021812522 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.1700035697 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.0687018163 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.213092048 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.0516867052 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.2165108464 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.0097728449 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.1483692036 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.08262787 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.281005553 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.0481979333 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.2232523474 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.0805820584 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.2555758551 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.0296286693 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.1319853113 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.0173366455 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.220616462 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.0065148659 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.1526491803 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.0459721625 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.2229551601 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.0233985631 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.1665184954 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.0355167863 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.2600874171 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.0323184525 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.1970289791 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.0892751266 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.2388608153 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.0416926889 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.1552666429 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.020551822 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.1292684598 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.0744822177 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.0527597248 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.3026154166 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.0777044688 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.3081482084 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.1237340737 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.3601104142 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.1371705946 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.4120757797 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.0837672025 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.2648038016 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.0885028071 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.2007646735 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.3548422361 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.460765953 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.4690424472 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.6788013861 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.1169662945 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.3242693179 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.2556403143 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.4583071754 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.0271486292 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.2182731449 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.0526574176 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.2424108963 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.0762125847 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.2228549327 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.0269063649 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.2401408344 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.0839707225 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.3074010094 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.2478840637 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.4205657928 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0290727628 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.1841843114 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0523495621 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.1231670583 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.0005257422 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.0721265952 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.000262224 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.0118348356 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.0318398305 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.1834830244 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.0320718253 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.1032515167 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.1989310744 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.408792844 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.2107266229 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.3722535388 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.0223204074 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.1959765545 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.022115131 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.1882969266 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.1149995432 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.3052316233 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.1017437337 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.2651695911 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.1251179936 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.3078536626 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.1049757961 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.338086632 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.0185191424 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.1790132896 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.0151653031 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.0537338226 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.0341024751 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.2126115238 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.0221151729 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.1431429685 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.1041933329 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.3064701129 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.1299185029 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.3570513672 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.1170990874 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.3281623219 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.1166577127 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.2303280443 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.0192945074 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.2015068169 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.045857499 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.1778848232 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.0278653757 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.2309769046 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.0264488684 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.1618433519 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.0187368299 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.210610547 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.0070803381 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.0602951272 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.0343738545 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.1971697601 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.012430185 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.0969965616 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.1011791445 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.2665626277 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.0674482283 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.1439352867 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.0333812973 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.2140071833 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.013803565 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.0856760144 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.0857349903 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.2682295704 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.0363984536 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.2133514375 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.0831966089 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.3252283455 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.0100264548 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.2049643183 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.0402775114 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.2132968488 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.0178624704 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.1584836987 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.044306682 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.2110608123 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.0101250707 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.1446641679 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.0250471784 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.2097577846 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.034382114 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.2211758055 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.120023798 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.3039131897 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.1137229069 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.3446031673 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.1101780964 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.2424045636 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.0971253665 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.1223804901 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.0522706053 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.2509451803 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.0404811569 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.2098515398 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.1127735687 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.3096427976 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.1171995651 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.39693057 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.0249203424 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.183758763 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.0148302605 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.1329930306 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0171568718 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.1561109456 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0097264241 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.0452833915 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.0554840251 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.1538079363 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.0033288372 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.036508675 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.3055395757 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.4480585816 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.2165906221 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.3271537328 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.1017362354 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.2782010079 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.1041302213 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.2865629267 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.1031395116 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.3223915745 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.1194174782 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.3618255907 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.0901332073 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.2638668804 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.1408494847 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.2546101322 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.0826481083 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.2913230821 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.1808682916 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.3815777762 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.0012309971 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.0385987025 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.0093358773 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.0457261214 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.0448599501 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.1691371082 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.0527194634 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.0187725283 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.1451005114 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.0072043177 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.0622904587 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.015192186 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.1562018554 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.0146518601 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.0664358997 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.0168598973 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.1350682776 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.0192034206 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.1607323446 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.0538059584 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.2453781212 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.0917605905 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.2668905804 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.0217870696 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.2165031068 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.0464674805 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.2235940604 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.1007032416 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.2719560518 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.1266242057 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.3334538145 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.0422003709 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.253591842 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.0280729387 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.1884927612 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.0366379898 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.1847934746 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.0141355453 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.1724636201 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.1550101498 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.3623113506 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.1217984824 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.2801870917 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.0606788965 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.1688995018 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.0007119113 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.0386741345 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.0853746951 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.3009803927 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.0676677726 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.2051763344 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.0118399471 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.0818929883 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.0131103824 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.1075252941 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.2810155518 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.0683323294 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.2657241512 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.0921333598 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.2640680177 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.1042804602 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.257176459 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0453126073 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.1567880475 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.027790575 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.0855724163 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.096255918 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.2490196736 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.0759551519 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.2765897266 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.030810794 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.1804383237 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.0151653031 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.0766086067 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.1578714698 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.3784433754 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.1713340477 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.3260532752 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0354904515 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.1880266806 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.0178986288 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.0724032398 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.0482308543 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.2087387992 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.0574307954 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1098000711 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.0118398272 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.1505828307 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.008719744 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.056668863 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0200324188 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.1553578618 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0095466427 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.0491504248 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.0513648793 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.2577830867 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.119690435 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.1721639976 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.0821079546 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.3164863838 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.1265931852 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.1793067232 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.0115347204 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.1485833844 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.0093856962 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.0608458885 }, { "model":"microsoft\/phi-4-multimodal-instruct", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.1520421573 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.3707336059 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.0325845731 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.1866364833 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.049235994 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.1875704973 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0203716729 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.1964052359 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.0457848104 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.1635760551 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.0043114209 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.0733956093 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.0844832543 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.2899357726 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.0202296618 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.1170466993 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.1314009634 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.3827163755 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.0953897712 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.2894343613 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.0542604747 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.2839533373 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.0103488851 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.1022931459 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.1005104859 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.3241333261 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.0397410561 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.1986373033 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.0370753847 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.2466649661 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.0104943059 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.1703228075 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.1181763987 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.3010924314 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.0774366468 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.232244564 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.0700327695 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.2793919522 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.04780178 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.1986318307 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.0761832692 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.3293090829 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.0940299872 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.3022965125 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.1421502617 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.3249063292 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.044984749 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.1531327249 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.1016298945 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.2980803254 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.0978160022 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.2190252958 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.0784827192 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.3301673127 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.0942154389 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.3318581823 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.0227564483 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.1618637003 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.0041151275 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.052321141 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.1376994092 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.3446006208 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.1126650404 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.3592815418 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.1786073211 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.418923403 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.1529904036 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.4257110482 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.0881428767 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.2731602409 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.1667346071 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.3197259125 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.3166864072 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.5483508218 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.4404172544 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.7231001513 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.1298121807 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.3403579227 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3743863952 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.5971283997 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.1016894588 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.3636401028 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.1695199459 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.3986853323 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.1164300835 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.3215620941 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.1452350029 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.4128118494 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.1078563354 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.3207926618 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.3659011486 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.580998869 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0208055886 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.1775856129 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0303702553 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.1647346597 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.0897802232 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.3334021167 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.0546926081 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.2631919591 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.038636598 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.1770095402 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.0050909961 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.1811657432 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.1271878224 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.3698932868 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.1324625901 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.3233634009 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.0809118708 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.2537010038 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.022242601 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.2086622767 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.1216394809 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.4062520998 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.1488006127 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.3814897068 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.1489627056 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.4172638299 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.2008824981 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.5185852751 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.0505138835 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.1914888261 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.0070198993 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.1690394526 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.0982399037 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.2845447958 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.0238061486 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.2070418144 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.1148524922 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.3722842281 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.250138544 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.5338430631 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.1169165949 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.3638899173 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.1330552123 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.2685952079 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.0823011221 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.2825939861 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.0327082346 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.2319054893 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.044667859 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.2691000298 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.0732644907 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.3296633392 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.0353609299 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.1909025949 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.0077167113 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.1386174808 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.1165534681 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.3877914341 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.1033665849 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.3638806009 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.1314926141 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.3540405018 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.1705869429 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.2612780395 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.0158033007 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.1802186885 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.004737288 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.1665989397 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.0968803629 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.3271235347 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.0742021289 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.1811100359 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.0830883828 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.2590209016 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.0473984845 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.2714023791 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.0436065244 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.1616079019 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.0321839146 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.2872152251 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.1039755938 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.3670583743 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.0490353313 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.3016997477 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.1016737952 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.3529445259 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.0980137705 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.3076980329 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.2051934522 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.4174861616 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.1030714956 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.3069420156 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.1062506996 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.3311804385 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.0661499319 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.2408635082 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.1305087747 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.336550146 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.0829900967 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.3039678683 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.1166543201 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.3031041679 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.200304354 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.4780661009 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.0265108253 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.1166554461 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.0293332904 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.1849240696 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0177577979 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.1411822431 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0101970078 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.1613941454 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.0446786865 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.1562050743 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.0022538162 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.0960982382 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.3158857772 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.5109022919 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.2903521386 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.4758823803 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.1198559998 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.3492711529 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.2480316528 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.4488014348 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.1398701241 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.3034565852 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.3460979115 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.5835851988 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.1130151873 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.3818050844 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.1838222494 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.416344125 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.0947682488 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.3259777135 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.1796274314 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.4360781177 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.0300958323 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.2021438397 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.0097834933 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.1744531846 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.0299661217 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.204544657 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.0009238366 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.0843772457 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.031724087 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.16451202 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.0046124791 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.1085913002 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.017437495 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.142035614 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.0146532378 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.1923028552 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.0516240546 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.153428686 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.0121002424 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.1697462625 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.115711536 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.3597872407 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.1694354423 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.4167060912 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.0407632458 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.2238900502 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.0714251247 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.2764333203 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.2208693059 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.4679683611 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.3234795754 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.5608576982 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.0669163701 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.2784916366 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.0633186191 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.3074668268 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.0917433239 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.3851148557 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.1299606269 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.3428948363 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.1471870965 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.3392441061 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.2060450795 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.3717773766 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.0594528699 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.1988219607 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.1247215313 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.1079848157 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.2801778291 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.1131365873 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.2713637811 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.0118794667 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.1249552242 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.0507921341 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.1097631082 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.3352988316 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.0795019275 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.2884455353 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.0837893895 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.2809963487 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.2215012201 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.4551853935 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0345153294 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.1364890072 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0012941396 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.1092334478 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.0522713846 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.3192866676 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.0868686952 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.2859772299 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.0689558305 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.22853185 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.0415402981 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.2227329297 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.0649160569 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.2830042558 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.0350138164 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.2206802597 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0588767323 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.2123528181 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.0134096062 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.1594472691 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.0908263331 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.3394219762 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.0645497034 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.0478741208 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.1576256072 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.0235106256 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.1852540612 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0235724586 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.1570768217 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0152437624 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.1214492647 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.0740063452 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.2757375638 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.0872564614 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.1427963743 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.0992800287 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.324429867 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.09185491 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.1769207611 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.0494841031 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.1999996494 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.0196415161 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.1910730769 }, { "model":"mistralai\/mistral-nemo", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-saba", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.2867784698 }, { "model":"mistralai\/mistral-saba", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.5037863792 }, { "model":"mistralai\/mistral-saba", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.2421610142 }, { "model":"mistralai\/mistral-saba", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.4148106883 }, { "model":"mistralai\/mistral-saba", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.0787707917 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.218746848 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0187531501 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.1112723085 }, { "model":"mistralai\/mistral-saba", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-saba", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-saba", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-saba", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-saba", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.0462314764 }, { "model":"mistralai\/mistral-saba", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.1434072436 }, { "model":"mistralai\/mistral-saba", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.0132821079 }, { "model":"mistralai\/mistral-saba", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.0456848091 }, { "model":"mistralai\/mistral-saba", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.2054466179 }, { "model":"mistralai\/mistral-saba", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.4635698598 }, { "model":"mistralai\/mistral-saba", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.2513408047 }, { "model":"mistralai\/mistral-saba", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.5078766295 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.278999196 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.5072892325 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.335915232 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.5453940527 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.1240264763 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.3914345538 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1767788852 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.3838449247 }, { "model":"mistralai\/mistral-saba", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.2249154291 }, { "model":"mistralai\/mistral-saba", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.4283700551 }, { "model":"mistralai\/mistral-saba", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.2483121176 }, { "model":"mistralai\/mistral-saba", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.4611825726 }, { "model":"mistralai\/mistral-saba", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-saba", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-saba", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.1368466985 }, { "model":"mistralai\/mistral-saba", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.3988973343 }, { "model":"mistralai\/mistral-saba", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.0337104112 }, { "model":"mistralai\/mistral-saba", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.2239889311 }, { "model":"mistralai\/mistral-saba", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.2836423323 }, { "model":"mistralai\/mistral-saba", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.4827693819 }, { "model":"mistralai\/mistral-saba", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.1511392088 }, { "model":"mistralai\/mistral-saba", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.3858872623 }, { "model":"mistralai\/mistral-saba", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.1923090312 }, { "model":"mistralai\/mistral-saba", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.3973361244 }, { "model":"mistralai\/mistral-saba", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.1398741741 }, { "model":"mistralai\/mistral-saba", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.4023712427 }, { "model":"mistralai\/mistral-saba", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.1262645615 }, { "model":"mistralai\/mistral-saba", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.389349888 }, { "model":"mistralai\/mistral-saba", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.164496362 }, { "model":"mistralai\/mistral-saba", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.42344822 }, { "model":"mistralai\/mistral-saba", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.2246336129 }, { "model":"mistralai\/mistral-saba", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.4406538597 }, { "model":"mistralai\/mistral-saba", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.1192868334 }, { "model":"mistralai\/mistral-saba", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.3094007011 }, { "model":"mistralai\/mistral-saba", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.2557484343 }, { "model":"mistralai\/mistral-saba", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.4816174974 }, { "model":"mistralai\/mistral-saba", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.336565743 }, { "model":"mistralai\/mistral-saba", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.500099888 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.1502069597 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.365503748 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.1518861892 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.3795025844 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.0716255326 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.2354287318 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.0016024995 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.1289259809 }, { "model":"mistralai\/mistral-saba", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.2895060168 }, { "model":"mistralai\/mistral-saba", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.5088283599 }, { "model":"mistralai\/mistral-saba", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.3502843148 }, { "model":"mistralai\/mistral-saba", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.5851011111 }, { "model":"mistralai\/mistral-saba", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.33652498 }, { "model":"mistralai\/mistral-saba", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.5394051209 }, { "model":"mistralai\/mistral-saba", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.4124704223 }, { "model":"mistralai\/mistral-saba", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.6500309258 }, { "model":"mistralai\/mistral-saba", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.2440848305 }, { "model":"mistralai\/mistral-saba", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.4482066389 }, { "model":"mistralai\/mistral-saba", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.3683187834 }, { "model":"mistralai\/mistral-saba", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.5300499022 }, { "model":"mistralai\/mistral-saba", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.4669071745 }, { "model":"mistralai\/mistral-saba", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.6351301458 }, { "model":"mistralai\/mistral-saba", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.5813419207 }, { "model":"mistralai\/mistral-saba", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.8065247071 }, { "model":"mistralai\/mistral-saba", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.293714449 }, { "model":"mistralai\/mistral-saba", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.4892518335 }, { "model":"mistralai\/mistral-saba", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3400529578 }, { "model":"mistralai\/mistral-saba", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.6054518089 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.2392739698 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.4813714407 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.2321648572 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.4166342577 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.2811212879 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.4244631944 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.2282664087 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.5153269959 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.2548863763 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.5181895957 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.4309072933 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.6122951839 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0365190298 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.1781763265 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0417146562 }, { "model":"mistralai\/mistral-saba", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.0914072868 }, { "model":"mistralai\/mistral-saba", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.266641943 }, { "model":"mistralai\/mistral-saba", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.4689143537 }, { "model":"mistralai\/mistral-saba", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.1714078748 }, { "model":"mistralai\/mistral-saba", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.45528413 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.0639113657 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.1707631202 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.0551968249 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.165412979 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-saba", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-saba", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.3708164771 }, { "model":"mistralai\/mistral-saba", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.5780452995 }, { "model":"mistralai\/mistral-saba", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.3889665973 }, { "model":"mistralai\/mistral-saba", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.5940361548 }, { "model":"mistralai\/mistral-saba", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.261843766 }, { "model":"mistralai\/mistral-saba", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.4806474097 }, { "model":"mistralai\/mistral-saba", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.0995040783 }, { "model":"mistralai\/mistral-saba", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.3935925698 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.218205371 }, { "model":"mistralai\/mistral-saba", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.4561374245 }, { "model":"mistralai\/mistral-saba", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.2523642916 }, { "model":"mistralai\/mistral-saba", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.5178642158 }, { "model":"mistralai\/mistral-saba", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.2714445111 }, { "model":"mistralai\/mistral-saba", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.5033343062 }, { "model":"mistralai\/mistral-saba", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.3367134056 }, { "model":"mistralai\/mistral-saba", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.6204206544 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.0577998278 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.184470268 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.0317976664 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.1311048104 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.097793149 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.2519860373 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.0683877466 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.255829494 }, { "model":"mistralai\/mistral-saba", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.26428669 }, { "model":"mistralai\/mistral-saba", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.4902151754 }, { "model":"mistralai\/mistral-saba", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.3239977856 }, { "model":"mistralai\/mistral-saba", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.5865126635 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.2208745982 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.4944838309 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.1755723698 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.3733502483 }, { "model":"mistralai\/mistral-saba", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-saba", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.1125731148 }, { "model":"mistralai\/mistral-saba", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.2778916971 }, { "model":"mistralai\/mistral-saba", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.0861201622 }, { "model":"mistralai\/mistral-saba", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.3310005151 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-saba", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.1995238484 }, { "model":"mistralai\/mistral-saba", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.4335224538 }, { "model":"mistralai\/mistral-saba", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.172304501 }, { "model":"mistralai\/mistral-saba", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.4246105774 }, { "model":"mistralai\/mistral-saba", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-saba", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-saba", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-saba", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.0572277693 }, { "model":"mistralai\/mistral-saba", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.2158207267 }, { "model":"mistralai\/mistral-saba", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.0812368695 }, { "model":"mistralai\/mistral-saba", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-saba", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.2415284955 }, { "model":"mistralai\/mistral-saba", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.4761318508 }, { "model":"mistralai\/mistral-saba", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.2506029382 }, { "model":"mistralai\/mistral-saba", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.4820610024 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.2390442925 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.4660486517 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.1889249825 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.314876296 }, { "model":"mistralai\/mistral-saba", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.0673075407 }, { "model":"mistralai\/mistral-saba", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.2078406147 }, { "model":"mistralai\/mistral-saba", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0194519734 }, { "model":"mistralai\/mistral-saba", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.1011298866 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.2922121087 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.5037938788 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.2113094586 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.4865126178 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.2362297066 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.4746614882 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.108285746 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.3934483867 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.0484302224 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.2222317379 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.0220530515 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.2506994166 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.2936668736 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.4940538554 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.1686173343 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.4033123912 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.3097940645 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.5304242832 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.2168719994 }, { "model":"mistralai\/mistral-saba", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.4555868419 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.3030951939 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.5195567075 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.3655435175 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.6292737269 }, { "model":"mistralai\/mistral-saba", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-saba", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-saba", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.20731642 }, { "model":"mistralai\/mistral-saba", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.414222781 }, { "model":"mistralai\/mistral-saba", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.0884087592 }, { "model":"mistralai\/mistral-saba", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.2678188556 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.2746115511 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.5022730585 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.1969099003 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.4481037581 }, { "model":"mistralai\/mistral-saba", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.23536401 }, { "model":"mistralai\/mistral-saba", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.4943928771 }, { "model":"mistralai\/mistral-saba", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.3235114454 }, { "model":"mistralai\/mistral-saba", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.5875209718 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.0495608632 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.2045968087 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.0294620037 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.1680044731 }, { "model":"mistralai\/mistral-saba", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-saba", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-saba", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0200154664 }, { "model":"mistralai\/mistral-saba", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.1606834413 }, { "model":"mistralai\/mistral-saba", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0044245595 }, { "model":"mistralai\/mistral-saba", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.1054952984 }, { "model":"mistralai\/mistral-saba", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-saba", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-saba", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-saba", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.1001204869 }, { "model":"mistralai\/mistral-saba", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.2836152046 }, { "model":"mistralai\/mistral-saba", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.0445164582 }, { "model":"mistralai\/mistral-saba", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.2337334441 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.4077844252 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.6076754833 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.4459003493 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.5840266721 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.2148436144 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.4670207413 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.2898684366 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.525989117 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.264907032 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.4939362461 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.4563355662 }, { "model":"mistralai\/mistral-saba", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.6843169799 }, { "model":"mistralai\/mistral-saba", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.258396409 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.4912290692 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.4713411152 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.6517904546 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.1955652432 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.4654058492 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.2591393679 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.4936043335 }, { "model":"mistralai\/mistral-saba", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.0562734776 }, { "model":"mistralai\/mistral-saba", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.212197658 }, { "model":"mistralai\/mistral-saba", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.0314083234 }, { "model":"mistralai\/mistral-saba", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.1492631083 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.0890242869 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.2507686532 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.0807676975 }, { "model":"mistralai\/mistral-saba", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-saba", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-saba", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-saba", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-saba", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.0084519738 }, { "model":"mistralai\/mistral-saba", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.151041875 }, { "model":"mistralai\/mistral-saba", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.0056901248 }, { "model":"mistralai\/mistral-saba", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.100130068 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.0267041676 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.1555601794 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.018455165 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.1280213362 }, { "model":"mistralai\/mistral-saba", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-saba", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-saba", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.0558797598 }, { "model":"mistralai\/mistral-saba", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.1672295272 }, { "model":"mistralai\/mistral-saba", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.0266323434 }, { "model":"mistralai\/mistral-saba", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.1344659816 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.2165549669 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.4902121608 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.2493024035 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.4391116426 }, { "model":"mistralai\/mistral-saba", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-saba", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.1532491466 }, { "model":"mistralai\/mistral-saba", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.3330342559 }, { "model":"mistralai\/mistral-saba", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.0762662838 }, { "model":"mistralai\/mistral-saba", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.2740420072 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.2875970952 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.4962250868 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.382073635 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.6293993104 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.0845702794 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.2799479817 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.1327606257 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.4346855791 }, { "model":"mistralai\/mistral-saba", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.2777158956 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.4958635491 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.2970897235 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.5394670378 }, { "model":"mistralai\/mistral-saba", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.3805998732 }, { "model":"mistralai\/mistral-saba", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.5566308844 }, { "model":"mistralai\/mistral-saba", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.3811390337 }, { "model":"mistralai\/mistral-saba", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.5895281984 }, { "model":"mistralai\/mistral-saba", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.0685553777 }, { "model":"mistralai\/mistral-saba", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.2845942287 }, { "model":"mistralai\/mistral-saba", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.0371906835 }, { "model":"mistralai\/mistral-saba", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.1797173863 }, { "model":"mistralai\/mistral-saba", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.2079294904 }, { "model":"mistralai\/mistral-saba", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.4320631023 }, { "model":"mistralai\/mistral-saba", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.2456928253 }, { "model":"mistralai\/mistral-saba", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.3989628007 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.0154825384 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.1339811483 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.0278445131 }, { "model":"mistralai\/mistral-saba", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.2699521486 }, { "model":"mistralai\/mistral-saba", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.478882362 }, { "model":"mistralai\/mistral-saba", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.2654978305 }, { "model":"mistralai\/mistral-saba", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.5487755246 }, { "model":"mistralai\/mistral-saba", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.2622533206 }, { "model":"mistralai\/mistral-saba", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.4831695415 }, { "model":"mistralai\/mistral-saba", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.3002613398 }, { "model":"mistralai\/mistral-saba", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.4834403722 }, { "model":"mistralai\/mistral-saba", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-saba", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0279025481 }, { "model":"mistralai\/mistral-saba", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.0843438607 }, { "model":"mistralai\/mistral-saba", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0224775292 }, { "model":"mistralai\/mistral-saba", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.1057554869 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.2469989894 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.4943114536 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.2844546137 }, { "model":"mistralai\/mistral-saba", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.477210689 }, { "model":"mistralai\/mistral-saba", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.1727284585 }, { "model":"mistralai\/mistral-saba", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.4250344787 }, { "model":"mistralai\/mistral-saba", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.1817744295 }, { "model":"mistralai\/mistral-saba", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.4656549066 }, { "model":"mistralai\/mistral-saba", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.2373174322 }, { "model":"mistralai\/mistral-saba", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.4521152897 }, { "model":"mistralai\/mistral-saba", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.2962197342 }, { "model":"mistralai\/mistral-saba", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.544285644 }, { "model":"mistralai\/mistral-saba", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0678548322 }, { "model":"mistralai\/mistral-saba", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.1995976377 }, { "model":"mistralai\/mistral-saba", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.0410008999 }, { "model":"mistralai\/mistral-saba", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.1410775666 }, { "model":"mistralai\/mistral-saba", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.1807443545 }, { "model":"mistralai\/mistral-saba", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.4282740606 }, { "model":"mistralai\/mistral-saba", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.1030562145 }, { "model":"mistralai\/mistral-saba", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1694260317 }, { "model":"mistralai\/mistral-saba", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-saba", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.0705660888 }, { "model":"mistralai\/mistral-saba", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.1999709116 }, { "model":"mistralai\/mistral-saba", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.005606616 }, { "model":"mistralai\/mistral-saba", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.0791302868 }, { "model":"mistralai\/mistral-saba", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-saba", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-saba", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-saba", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-saba", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0146140319 }, { "model":"mistralai\/mistral-saba", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.1546761245 }, { "model":"mistralai\/mistral-saba", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0066574272 }, { "model":"mistralai\/mistral-saba", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.0720455741 }, { "model":"mistralai\/mistral-saba", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-saba", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-saba", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.1686505919 }, { "model":"mistralai\/mistral-saba", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.4407452421 }, { "model":"mistralai\/mistral-saba", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.1561257665 }, { "model":"mistralai\/mistral-saba", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2413348415 }, { "model":"mistralai\/mistral-saba", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-saba", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-saba", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-saba", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.2391219094 }, { "model":"mistralai\/mistral-saba", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.4993680631 }, { "model":"mistralai\/mistral-saba", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.2546682455 }, { "model":"mistralai\/mistral-saba", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.3131256963 }, { "model":"mistralai\/mistral-saba", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-saba", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-saba", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-saba", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-saba", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.0409238482 }, { "model":"mistralai\/mistral-saba", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.1759269251 }, { "model":"mistralai\/mistral-saba", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.040396663 }, { "model":"mistralai\/mistral-saba", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.1387970813 }, { "model":"mistralai\/mistral-saba", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.2175149129 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.4527759686 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.1124798847 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.2758121544 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.0499477269 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.161467557 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0061206295 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.1175310591 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.0682401612 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.0028802187 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.0506386945 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.1505032551 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.4086092545 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.1882561377 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.3814760125 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.1878091774 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.4280751788 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.2100749947 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.3864616183 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.093266394 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.311215609 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1039989943 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.2907556954 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.1020177653 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.3338664094 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.1064323135 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.3080022567 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.0979045908 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.3174950846 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.0117862293 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.1847940791 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.2573591397 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.4309874046 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.0851522303 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.2284178182 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.0818689903 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.2636451344 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.0746797577 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.3147526037 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.0989261118 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.3536619814 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.1746758677 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.3604842775 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.1908291186 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.4048132215 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.0320570973 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.1921106676 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.1554049163 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.3557989532 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.2229681692 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.3722031872 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.2130798288 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.3738480621 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.1589337472 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.4334432444 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.037561576 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.1318206471 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.1217595648 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.210101514 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.4301134482 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.362165784 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.5625700474 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.2836619572 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.5189927538 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.3481484827 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.5703115876 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.2310257801 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.4431650209 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.2968021074 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.4607855577 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.4358717425 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.6055153523 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.6254747881 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.7898596498 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.2211802733 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.4673850088 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3141247128 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.5664089061 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.1378005544 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.3634410941 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.1662541754 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.3656154799 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.2598161419 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.4673557809 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.2352273865 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.5423645035 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.2637885864 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.50661739 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.3898419239 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.5983170279 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.030172991 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.1791853335 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0012644122 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.0541809315 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.1879687767 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.3575768224 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.1193068232 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.3258076554 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.0424529379 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.205545596 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.0315047557 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.1862281652 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.2931006661 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.5117710763 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.3454885653 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.5632832845 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.1423442665 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.3938289086 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.0479071398 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.2390532358 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.1466679693 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.4020226017 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.2296543368 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.4623290904 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.1846129963 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.4218789485 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.2363785743 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.5715726858 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.042005649 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.1704312564 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.0060037968 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.1052387436 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.1206947602 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.2893596175 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.0376635554 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.2154020665 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.203133363 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.4421827582 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.2636104621 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.5093890816 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.1660483895 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.4317128373 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.2448276505 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.4102738917 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.1159703103 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.3116167676 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.0803083214 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.3001867634 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.1346426707 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.3961827686 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.138703676 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.4106570721 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.0501718274 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.2070022512 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.0014765966 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.0534187009 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.1342281856 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.3526105747 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.146553268 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.3291051456 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.1737342381 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.4232883693 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.2383832092 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.33001113 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.0707102369 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.2324042355 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.1164466909 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.2572289084 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.5125685183 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.1223672825 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.3023512099 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.2098940087 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.443017936 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.0762167285 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.2552721118 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.0777950532 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.2573049595 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.0666322315 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.3626993592 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.2059798463 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.4231363675 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.2059172406 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.4148312305 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.2216993022 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.4221894818 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.1386303624 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.3369477219 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.2392636803 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.4545241599 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.3632780792 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.6254141203 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.0217828279 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.144054989 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.0351771663 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.2287244941 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.1464596557 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.3564056311 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.1517063855 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.4352500122 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.2230448991 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.4584787016 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.2531295878 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.5302964071 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.0696320569 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.2331307278 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.0252073886 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.1703033014 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0276939955 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.1837469296 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0160221908 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.2144565152 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.083347512 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.2190646209 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.0679191643 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.2873941526 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.3647734864 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.5784168493 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.3897396366 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.5030239884 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.1850936564 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.3999928464 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.2726874239 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.4948927457 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.2070601418 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.4601106145 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.412349088 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.6102742767 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.2142629544 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.4499459763 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.3895465667 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.5708848992 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.1710045162 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.433047449 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.2961144006 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.5257430939 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.057729338 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.2177957601 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.0230572611 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.16449999 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.0484864486 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.1952223401 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.0005975301 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.1227717162 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.0165865489 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.1532087128 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.1038227782 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.0457227327 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.2051868353 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.0141586748 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.1192333436 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.0421597981 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.1665679168 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.0011074127 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.1483863351 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.1814266299 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.4404144211 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.2821064012 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.492442613 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.1500059372 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.385325025 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.0190556256 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.201727165 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.2063172621 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.4245629061 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.2871245672 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.5575367366 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.1106429776 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.3246322884 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.1586725311 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.4486820539 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.0861902503 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.2737902674 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.1910130331 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.4159297845 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.2631133201 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.504043761 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.2496114121 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.4707696336 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.0528559098 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.2067746551 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.0275980154 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.1880963665 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.1533969949 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.3718867563 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.2858310833 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.4323129392 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.0536249593 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.190207113 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.0308454815 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.1776801562 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.4036600408 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.2335702423 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.4992388897 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.1788374332 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.4671147568 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.2348003993 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.4786253942 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.2 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0150184554 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.1309010161 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0010839978 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.0719045729 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.1854021136 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.4258067424 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.1388133394 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.3685419874 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.1877241389 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.4060591516 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.1165785177 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.4074870036 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.2204611632 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.4366931331 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.2957932526 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.5127606293 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0530966299 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.183305815 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.0017326575 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.1225688999 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.1059358827 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.3291955196 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.0116334446 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.0643182856 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.0665909516 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.2275785677 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.0230978994 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.1160311087 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0299390587 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.1474455997 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.0919549448 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.6 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.1503741808 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.3985964495 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.1755859315 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2407951689 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.1386896901 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.413832278 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.2503925306 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.3045084897 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.0365820579 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.1964572986 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.0044447951 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.1846817289 }, { "model":"mistralai\/mistral-small-3.1-24b-instruct", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.0371320408 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.1862601893 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0232552001 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.1816122083 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.0762582721 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.2256183152 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.0155010137 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.111632655 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.1105492032 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.3824462343 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.2049615052 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.4519234477 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.1985490849 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.4116485218 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.3336371818 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.5253002356 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.0933745535 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.3852051191 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.112918589 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.3403937393 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.1051779987 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.3391065166 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.1966274075 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.3960585372 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.0575768902 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.3040332139 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.0290897017 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.2669483396 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.1716262856 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.4261716241 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.1464539147 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.3455385109 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.0835422268 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.3248882933 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.0978478358 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.3301671275 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.1701727662 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.4313256486 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.122889461 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.289913907 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.1281858401 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.3858938936 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.125191978 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.3696701209 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.2520982183 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.4444681724 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.3102736093 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.5214898195 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.2303251904 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.450613459 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.2426993481 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.4712451818 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.2626874911 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.4988486171 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.3809175562 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.6066039572 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.1697631286 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.3535802564 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.2098247736 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.417434594 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.399751444 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.5723628973 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.464784706 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.6741611276 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.2233867986 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.4625939523 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.361048469 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.5667561181 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.1859289486 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.4402215767 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.0783879247 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.2777670309 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.2111316415 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.4343589207 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.1904775276 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.4788196159 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.2014604354 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.4639286173 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.3927528149 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.5874921326 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0768136914 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.1865808917 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0163860397 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.1761153537 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.1323783916 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.3850340086 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.1060744828 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.3580675535 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.0737222138 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.2500218213 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.0721259007 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.2931833463 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.3206299694 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.5380529839 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.292022826 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.5099805952 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.1310248624 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.4057643378 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.0541491102 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.263637102 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.2005919962 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.4635609134 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.2668215975 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.4987236442 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.1674187488 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.4422221563 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.2922430013 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.5510140576 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.0555362323 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.236381065 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.0170051195 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.1450260585 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.1928019801 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.4582860792 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.2733663358 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.5231933614 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.1843295265 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.4175841484 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.247062292 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.3738750801 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.1248080013 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.3323730185 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.1222608237 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.3755543507 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.0542399326 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.2976203376 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.0603448772 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.3011538751 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.0423245128 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.2646605638 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.0030665166 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.1617644115 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.142542051 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.3941172286 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.095018815 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.3690882139 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.1298139392 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.3909547555 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.2582727386 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.3442557032 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.2128159963 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.4639121691 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.1877293722 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.4156064229 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.1522348659 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.4142435328 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.0938946347 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.3116778843 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.0549301185 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.2672873596 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.0463756582 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.3097498513 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.1141453782 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.3477667157 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.1057291821 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.3103268517 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.1480972279 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.3846594696 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.1588893829 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.3218843951 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.217708728 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.449213988 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.3422949582 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.5811761531 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.1184833265 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.2686318029 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.1103891214 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.2937298939 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.192171828 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.4057435234 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.1172467131 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.375586286 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.1863008756 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.4262401563 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.2219365699 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.4997896782 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.0832085938 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.2560508851 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.0245166671 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.1971989167 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.030893556 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.1822055745 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0023241318 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.1765743592 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.0743696949 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.3048835131 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.0867109239 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.3141207717 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.2585423604 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.5140115555 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.3275813302 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.5154143201 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.1873357797 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.4495400323 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.2381984934 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.4592277795 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.2145991028 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.4593715469 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.3539022205 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.5923278871 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.1770834914 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.408612856 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.429961987 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.5972964968 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.1435401219 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.4105586063 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.2886257739 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.5087363637 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.0740797406 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.2542118208 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.0601341974 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.1994352479 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.0285852473 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.2160067741 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.014651722 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.1375629789 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.0391982932 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.1784853107 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.0244050078 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.2037164659 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.0803995043 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.3185143496 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.0756351517 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.3153237514 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.1706373545 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.4266803456 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.2642729747 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.4811936124 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.1231167016 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.3066261581 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.100703346 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.3394192326 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.2316592529 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.4448279614 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.3469148634 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.5817285551 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.2029959378 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.4619706712 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.266835444 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.5363657682 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.0974405375 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.3189571047 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.1013427217 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.40431727 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.2525105285 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.4602158898 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.1953888501 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.4178007058 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.1343983036 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.3632350324 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.2056905071 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.3721609069 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.1967376366 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.4299666079 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.2677982301 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.4953792654 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.1687607729 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.4201489822 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.2722608778 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.4873229562 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.1608191811 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.4194174213 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.1610922206 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.3629853655 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.1209426537 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.3088806755 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.1161087561 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.3498111478 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.1204645669 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.3876320563 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.1940449441 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.4193086485 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.0921504626 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.3200787292 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.0569972002 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1268003169 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0423997321 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.2100045407 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0262399026 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.1399469356 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.1264279499 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.3856755463 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.0962471892 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.186203302 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.1759566918 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.4501489751 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.2279432688 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.2931038513 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.1133170987 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.3072773582 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.0779267738 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.3132146793 }, { "model":"openai\/gpt-3.5-turbo-0613", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4.1", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-4.1", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.0878643961 }, { "model":"openai\/gpt-4.1", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.2943661311 }, { "model":"openai\/gpt-4.1", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0773692656 }, { "model":"openai\/gpt-4.1", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.3411692596 }, { "model":"openai\/gpt-4.1", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.1621142099 }, { "model":"openai\/gpt-4.1", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.4428263457 }, { "model":"openai\/gpt-4.1", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.1504890085 }, { "model":"openai\/gpt-4.1", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.2959385484 }, { "model":"openai\/gpt-4.1", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.2484834927 }, { "model":"openai\/gpt-4.1", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.5412047755 }, { "model":"openai\/gpt-4.1", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.2278733475 }, { "model":"openai\/gpt-4.1", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.4958526675 }, { "model":"openai\/gpt-4.1", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.265308921 }, { "model":"openai\/gpt-4.1", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.534633443 }, { "model":"openai\/gpt-4.1", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.4133349725 }, { "model":"openai\/gpt-4.1", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.6096991153 }, { "model":"openai\/gpt-4.1", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.1444098549 }, { "model":"openai\/gpt-4.1", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.4354187609 }, { "model":"openai\/gpt-4.1", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.2001187188 }, { "model":"openai\/gpt-4.1", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.4423462053 }, { "model":"openai\/gpt-4.1", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.2150495101 }, { "model":"openai\/gpt-4.1", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.4509543639 }, { "model":"openai\/gpt-4.1", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.2997633261 }, { "model":"openai\/gpt-4.1", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.4845814777 }, { "model":"openai\/gpt-4.1", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.2491524665 }, { "model":"openai\/gpt-4.1", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.4859498791 }, { "model":"openai\/gpt-4.1", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.138136017 }, { "model":"openai\/gpt-4.1", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.3570715701 }, { "model":"openai\/gpt-4.1", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.3489496041 }, { "model":"openai\/gpt-4.1", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.5581870497 }, { "model":"openai\/gpt-4.1", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.1361506132 }, { "model":"openai\/gpt-4.1", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.3888910906 }, { "model":"openai\/gpt-4.1", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.1613185111 }, { "model":"openai\/gpt-4.1", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.4197419896 }, { "model":"openai\/gpt-4.1", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.1539862054 }, { "model":"openai\/gpt-4.1", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.4377056399 }, { "model":"openai\/gpt-4.1", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.2501533038 }, { "model":"openai\/gpt-4.1", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.5157072708 }, { "model":"openai\/gpt-4.1", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.1995636011 }, { "model":"openai\/gpt-4.1", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.4138783532 }, { "model":"openai\/gpt-4.1", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.3114258781 }, { "model":"openai\/gpt-4.1", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.5457791444 }, { "model":"openai\/gpt-4.1", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.3254574442 }, { "model":"openai\/gpt-4.1", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.5346247566 }, { "model":"openai\/gpt-4.1", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.399240538 }, { "model":"openai\/gpt-4.1", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.6190587277 }, { "model":"openai\/gpt-4.1", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.3751238401 }, { "model":"openai\/gpt-4.1", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.6143783892 }, { "model":"openai\/gpt-4.1", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.3217774713 }, { "model":"openai\/gpt-4.1", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.5782452692 }, { "model":"openai\/gpt-4.1", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.4138800821 }, { "model":"openai\/gpt-4.1", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.6092456527 }, { "model":"openai\/gpt-4.1", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.3464595133 }, { "model":"openai\/gpt-4.1", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.5867734529 }, { "model":"openai\/gpt-4.1", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.477316562 }, { "model":"openai\/gpt-4.1", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.6811707635 }, { "model":"openai\/gpt-4.1", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.2881859392 }, { "model":"openai\/gpt-4.1", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.5283141363 }, { "model":"openai\/gpt-4.1", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.2909203719 }, { "model":"openai\/gpt-4.1", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.5023060375 }, { "model":"openai\/gpt-4.1", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.492992017 }, { "model":"openai\/gpt-4.1", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.6758612579 }, { "model":"openai\/gpt-4.1", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.6212437369 }, { "model":"openai\/gpt-4.1", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.8160680265 }, { "model":"openai\/gpt-4.1", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.3073587665 }, { "model":"openai\/gpt-4.1", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.5589719771 }, { "model":"openai\/gpt-4.1", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.4007144936 }, { "model":"openai\/gpt-4.1", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.6428162124 }, { "model":"openai\/gpt-4.1", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.3047473913 }, { "model":"openai\/gpt-4.1", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.541913317 }, { "model":"openai\/gpt-4.1", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.2033525098 }, { "model":"openai\/gpt-4.1", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.4483666995 }, { "model":"openai\/gpt-4.1", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.3915612434 }, { "model":"openai\/gpt-4.1", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.6080295028 }, { "model":"openai\/gpt-4.1", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.3199141865 }, { "model":"openai\/gpt-4.1", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.588811105 }, { "model":"openai\/gpt-4.1", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.301906911 }, { "model":"openai\/gpt-4.1", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.5589917916 }, { "model":"openai\/gpt-4.1", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.4922424861 }, { "model":"openai\/gpt-4.1", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.6881055928 }, { "model":"openai\/gpt-4.1", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0362803832 }, { "model":"openai\/gpt-4.1", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.2412638087 }, { "model":"openai\/gpt-4.1", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0590184507 }, { "model":"openai\/gpt-4.1", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.2182867648 }, { "model":"openai\/gpt-4.1", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.293318961 }, { "model":"openai\/gpt-4.1", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.5302757414 }, { "model":"openai\/gpt-4.1", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.1439576296 }, { "model":"openai\/gpt-4.1", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.4388678133 }, { "model":"openai\/gpt-4.1", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.2178256702 }, { "model":"openai\/gpt-4.1", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.4424716551 }, { "model":"openai\/gpt-4.1", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.2443077504 }, { "model":"openai\/gpt-4.1", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.5192699912 }, { "model":"openai\/gpt-4.1", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.3513424619 }, { "model":"openai\/gpt-4.1", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.5811151557 }, { "model":"openai\/gpt-4.1", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.3856423281 }, { "model":"openai\/gpt-4.1", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.6039414456 }, { "model":"openai\/gpt-4.1", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.2481480247 }, { "model":"openai\/gpt-4.1", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.508486097 }, { "model":"openai\/gpt-4.1", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.1531335794 }, { "model":"openai\/gpt-4.1", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.3911815819 }, { "model":"openai\/gpt-4.1", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.2876998483 }, { "model":"openai\/gpt-4.1", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.5628772937 }, { "model":"openai\/gpt-4.1", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.3576201412 }, { "model":"openai\/gpt-4.1", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.6153922032 }, { "model":"openai\/gpt-4.1", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.3584275831 }, { "model":"openai\/gpt-4.1", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.6013859082 }, { "model":"openai\/gpt-4.1", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.3499912941 }, { "model":"openai\/gpt-4.1", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.6402792518 }, { "model":"openai\/gpt-4.1", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.2060192505 }, { "model":"openai\/gpt-4.1", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.4844534641 }, { "model":"openai\/gpt-4.1", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.1961860496 }, { "model":"openai\/gpt-4.1", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.4729428536 }, { "model":"openai\/gpt-4.1", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.2912028765 }, { "model":"openai\/gpt-4.1", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.5457371537 }, { "model":"openai\/gpt-4.1", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.3511828155 }, { "model":"openai\/gpt-4.1", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.6085037742 }, { "model":"openai\/gpt-4.1", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.2166158629 }, { "model":"openai\/gpt-4.1", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.5075364476 }, { "model":"openai\/gpt-4.1", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.2931058111 }, { "model":"openai\/gpt-4.1", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.4502993046 }, { "model":"openai\/gpt-4.1", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.3528030853 }, { "model":"openai\/gpt-4.1", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.5716064196 }, { "model":"openai\/gpt-4.1", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.2391235505 }, { "model":"openai\/gpt-4.1", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.5839078959 }, { "model":"openai\/gpt-4.1", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.2190440582 }, { "model":"openai\/gpt-4.1", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.4908662007 }, { "model":"openai\/gpt-4.1", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.3214797925 }, { "model":"openai\/gpt-4.1", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.5762282439 }, { "model":"openai\/gpt-4.1", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.2706398193 }, { "model":"openai\/gpt-4.1", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.5473511459 }, { "model":"openai\/gpt-4.1", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.1856132097 }, { "model":"openai\/gpt-4.1", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.3901344593 }, { "model":"openai\/gpt-4.1", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.2760595824 }, { "model":"openai\/gpt-4.1", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.523164531 }, { "model":"openai\/gpt-4.1", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.3130393907 }, { "model":"openai\/gpt-4.1", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.5247440023 }, { "model":"openai\/gpt-4.1", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.1776114575 }, { "model":"openai\/gpt-4.1", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.4741587712 }, { "model":"openai\/gpt-4.1", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.2067364767 }, { "model":"openai\/gpt-4.1", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.3116379221 }, { "model":"openai\/gpt-4.1", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.3601446012 }, { "model":"openai\/gpt-4.1", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.5969111652 }, { "model":"openai\/gpt-4.1", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.2296100147 }, { "model":"openai\/gpt-4.1", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.4744292053 }, { "model":"openai\/gpt-4.1", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.3243424349 }, { "model":"openai\/gpt-4.1", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.5709461451 }, { "model":"openai\/gpt-4.1", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.1272378515 }, { "model":"openai\/gpt-4.1", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.4523606053 }, { "model":"openai\/gpt-4.1", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.2917695916 }, { "model":"openai\/gpt-4.1", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.5143075365 }, { "model":"openai\/gpt-4.1", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.2866152436 }, { "model":"openai\/gpt-4.1", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.5663273613 }, { "model":"openai\/gpt-4.1", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.3202923873 }, { "model":"openai\/gpt-4.1", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.5629214829 }, { "model":"openai\/gpt-4.1", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.2000751863 }, { "model":"openai\/gpt-4.1", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.4730887312 }, { "model":"openai\/gpt-4.1", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.247457636 }, { "model":"openai\/gpt-4.1", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.5269197766 }, { "model":"openai\/gpt-4.1", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.2884569727 }, { "model":"openai\/gpt-4.1", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.5016795899 }, { "model":"openai\/gpt-4.1", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.3751686059 }, { "model":"openai\/gpt-4.1", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.6197326636 }, { "model":"openai\/gpt-4.1", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.4401130744 }, { "model":"openai\/gpt-4.1", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.7145000136 }, { "model":"openai\/gpt-4.1", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.3303579297 }, { "model":"openai\/gpt-4.1", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.549332604 }, { "model":"openai\/gpt-4.1", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.2328260511 }, { "model":"openai\/gpt-4.1", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.5032600779 }, { "model":"openai\/gpt-4.1", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.307994769 }, { "model":"openai\/gpt-4.1", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.5645741484 }, { "model":"openai\/gpt-4.1", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.2521628085 }, { "model":"openai\/gpt-4.1", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.5130367104 }, { "model":"openai\/gpt-4.1", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.2706764356 }, { "model":"openai\/gpt-4.1", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.5187692381 }, { "model":"openai\/gpt-4.1", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.3256395629 }, { "model":"openai\/gpt-4.1", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.6002234371 }, { "model":"openai\/gpt-4.1", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.2193020818 }, { "model":"openai\/gpt-4.1", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.4750942093 }, { "model":"openai\/gpt-4.1", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.1102557203 }, { "model":"openai\/gpt-4.1", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.4682292826 }, { "model":"openai\/gpt-4.1", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.1206079965 }, { "model":"openai\/gpt-4.1", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.3822546587 }, { "model":"openai\/gpt-4.1", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0450954747 }, { "model":"openai\/gpt-4.1", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.3804747142 }, { "model":"openai\/gpt-4.1", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.2759362863 }, { "model":"openai\/gpt-4.1", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.521953003 }, { "model":"openai\/gpt-4.1", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.1651830786 }, { "model":"openai\/gpt-4.1", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.4234486928 }, { "model":"openai\/gpt-4.1", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.4189225146 }, { "model":"openai\/gpt-4.1", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.6647373749 }, { "model":"openai\/gpt-4.1", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.4787138393 }, { "model":"openai\/gpt-4.1", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.6097836343 }, { "model":"openai\/gpt-4.1", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.2802625681 }, { "model":"openai\/gpt-4.1", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.5439670195 }, { "model":"openai\/gpt-4.1", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.3443088412 }, { "model":"openai\/gpt-4.1", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.5816388936 }, { "model":"openai\/gpt-4.1", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.3203407715 }, { "model":"openai\/gpt-4.1", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.5533544406 }, { "model":"openai\/gpt-4.1", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.4490877978 }, { "model":"openai\/gpt-4.1", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.6695132668 }, { "model":"openai\/gpt-4.1", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.2794082054 }, { "model":"openai\/gpt-4.1", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.5408123233 }, { "model":"openai\/gpt-4.1", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.4984350217 }, { "model":"openai\/gpt-4.1", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.6700105545 }, { "model":"openai\/gpt-4.1", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.2317861129 }, { "model":"openai\/gpt-4.1", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.5199717777 }, { "model":"openai\/gpt-4.1", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.3286463098 }, { "model":"openai\/gpt-4.1", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.5704087395 }, { "model":"openai\/gpt-4.1", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.3269082527 }, { "model":"openai\/gpt-4.1", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.5720782047 }, { "model":"openai\/gpt-4.1", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.2945581276 }, { "model":"openai\/gpt-4.1", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.4770478865 }, { "model":"openai\/gpt-4.1", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.2237147063 }, { "model":"openai\/gpt-4.1", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.4928025786 }, { "model":"openai\/gpt-4.1", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.2213737985 }, { "model":"openai\/gpt-4.1", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.4089512188 }, { "model":"openai\/gpt-4.1", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.0991434845 }, { "model":"openai\/gpt-4.1", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.3451095887 }, { "model":"openai\/gpt-4.1", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.1515589229 }, { "model":"openai\/gpt-4.1", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.5015201773 }, { "model":"openai\/gpt-4.1", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.2407783488 }, { "model":"openai\/gpt-4.1", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.4696462601 }, { "model":"openai\/gpt-4.1", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.1992814962 }, { "model":"openai\/gpt-4.1", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.4739682422 }, { "model":"openai\/gpt-4.1", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.2962617057 }, { "model":"openai\/gpt-4.1", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.5718773299 }, { "model":"openai\/gpt-4.1", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.3246138439 }, { "model":"openai\/gpt-4.1", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.539231236 }, { "model":"openai\/gpt-4.1", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.2138222548 }, { "model":"openai\/gpt-4.1", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.4678880839 }, { "model":"openai\/gpt-4.1", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.1812913523 }, { "model":"openai\/gpt-4.1", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.4862460633 }, { "model":"openai\/gpt-4.1", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.2736390873 }, { "model":"openai\/gpt-4.1", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.5516496981 }, { "model":"openai\/gpt-4.1", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.3409932056 }, { "model":"openai\/gpt-4.1", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.6325116451 }, { "model":"openai\/gpt-4.1", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.3084306564 }, { "model":"openai\/gpt-4.1", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.5446161895 }, { "model":"openai\/gpt-4.1", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.4087794747 }, { "model":"openai\/gpt-4.1", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.669062824 }, { "model":"openai\/gpt-4.1", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.2266585274 }, { "model":"openai\/gpt-4.1", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.5043938863 }, { "model":"openai\/gpt-4.1", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.2800009794 }, { "model":"openai\/gpt-4.1", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.5340783161 }, { "model":"openai\/gpt-4.1", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.3852899552 }, { "model":"openai\/gpt-4.1", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.6247940844 }, { "model":"openai\/gpt-4.1", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.3365460818 }, { "model":"openai\/gpt-4.1", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.5508261106 }, { "model":"openai\/gpt-4.1", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.2949417989 }, { "model":"openai\/gpt-4.1", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.5355554723 }, { "model":"openai\/gpt-4.1", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.3946124626 }, { "model":"openai\/gpt-4.1", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.5531143677 }, { "model":"openai\/gpt-4.1", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.2730640179 }, { "model":"openai\/gpt-4.1", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.539343275 }, { "model":"openai\/gpt-4.1", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.367297377 }, { "model":"openai\/gpt-4.1", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.6209268292 }, { "model":"openai\/gpt-4.1", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.2800024381 }, { "model":"openai\/gpt-4.1", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.5593725229 }, { "model":"openai\/gpt-4.1", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.3549515665 }, { "model":"openai\/gpt-4.1", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.5522777328 }, { "model":"openai\/gpt-4.1", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.2068585944 }, { "model":"openai\/gpt-4.1", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.5050627139 }, { "model":"openai\/gpt-4.1", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.2927501641 }, { "model":"openai\/gpt-4.1", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.484706219 }, { "model":"openai\/gpt-4.1", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.2209837875 }, { "model":"openai\/gpt-4.1", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.4853024301 }, { "model":"openai\/gpt-4.1", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.2062980634 }, { "model":"openai\/gpt-4.1", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.5064032134 }, { "model":"openai\/gpt-4.1", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.2809055533 }, { "model":"openai\/gpt-4.1", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.5421068577 }, { "model":"openai\/gpt-4.1", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.4180012555 }, { "model":"openai\/gpt-4.1", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.6302564473 }, { "model":"openai\/gpt-4.1", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.2097645573 }, { "model":"openai\/gpt-4.1", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.4732281256 }, { "model":"openai\/gpt-4.1", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.119486019 }, { "model":"openai\/gpt-4.1", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1666195088 }, { "model":"openai\/gpt-4.1", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.1280239382 }, { "model":"openai\/gpt-4.1", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.360470667 }, { "model":"openai\/gpt-4.1", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0762109546 }, { "model":"openai\/gpt-4.1", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.2828209251 }, { "model":"openai\/gpt-4.1", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.1794305621 }, { "model":"openai\/gpt-4.1", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.4543396215 }, { "model":"openai\/gpt-4.1", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.1841660038 }, { "model":"openai\/gpt-4.1", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2628923071 }, { "model":"openai\/gpt-4.1", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.2064136736 }, { "model":"openai\/gpt-4.1", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.5051800847 }, { "model":"openai\/gpt-4.1", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.2731019968 }, { "model":"openai\/gpt-4.1", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.3107160924 }, { "model":"openai\/gpt-4.1", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.278809167 }, { "model":"openai\/gpt-4.1", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.5407280723 }, { "model":"openai\/gpt-4.1", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.266135659 }, { "model":"openai\/gpt-4.1", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.5629331219 }, { "model":"openai\/gpt-4.1", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.2198316321 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.4708151995 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.2234579509 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.4401488964 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.1056657743 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.3249231698 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0618207736 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.340632844 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.1521950168 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.3956387285 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.0953768122 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.2497030659 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.2413546506 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.5235234652 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.2271910382 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.4908497482 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.2566573338 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.5183862763 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.3901123396 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.5771753105 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.1301518556 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.4290918442 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1797566847 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.4271851106 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.1724971212 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.4012455839 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.2496844101 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.4720007075 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.1595843783 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.4348621346 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.1540181476 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.351822758 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.2979811644 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.5341221534 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.2462308641 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.4432887674 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.1840798833 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.4151816693 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.1349305067 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.397143235 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.1644448391 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.4629038808 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.2550498255 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.4544854197 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.2230536146 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.499651958 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.2227847146 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.40695057 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.2819696539 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.5293077213 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.3092254935 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.4661357412 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.365645255 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.5771393179 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.3274189601 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.5630354446 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.1379843601 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.3936670775 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.1018796158 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.3594406238 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.2953385985 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.5543620654 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.342919616 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.5729115023 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.3052927761 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.5504382993 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.4246355556 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.6487523813 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.2853090403 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.5033746216 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.2976764649 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.4568078793 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.5178458342 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.6792020066 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.6631992536 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.8257245236 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.2800331904 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.5328441069 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.3927902573 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.628791549 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.274202443 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.5239221129 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.144419277 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.405746187 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.3517517227 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.5678423102 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.2861570496 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.567530869 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.2950999056 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.5638983665 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.5061822417 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.6903823708 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0579371031 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.2407036725 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0338899407 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.1685773285 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.2249525185 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.4726822454 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.1646493878 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.4071725376 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.2293529776 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.4521332467 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.2554536105 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.5371463729 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.3531906075 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.6060071382 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.399293733 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.6132292528 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.2232750657 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.4801269988 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.1922860161 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.4363534921 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.2647815263 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.5197043469 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.3268056763 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.5486126608 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.2559881532 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.5349715693 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.3900018149 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.6494354052 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.1469460203 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.3801695829 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.1926475709 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.4187626054 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.1826483605 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.4686350803 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.1078652833 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.421647984 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.3047636442 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.5385736571 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.3458314466 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.5969984451 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.2100828863 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.4717405627 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.2844229339 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.4435245651 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.2977682173 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.5413323701 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.2537598479 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.5629521778 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.1646050237 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.4508391233 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.2752297553 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.5180256955 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.2132140468 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.5000034068 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.1292151863 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.3392182289 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.2382712271 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.4901100456 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.2584591395 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.4586627531 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.1783139223 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.4481556757 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.3002017818 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.3739762238 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.0772118618 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.2815494636 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.001488949 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.1588971491 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.3080966975 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.5628489014 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.2981973224 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.5193764902 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.2802761469 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.5391751615 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.2042851472 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.4615978684 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.243797007 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.4981055966 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.2038296766 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.5453530515 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.2649575888 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.5072138807 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.186903033 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.411527522 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.2358876365 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.4961149155 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.2674122275 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.4442281313 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.3443124421 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.5824988714 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.3763691574 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.6619682382 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.2596129619 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.485235691 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.2234699025 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.4866737746 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.3085593402 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.5486177789 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.2319189577 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.4830752425 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.2639124065 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.5166255119 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.342163716 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.6089275595 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.158564127 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.4086927045 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.1237632416 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.4428640995 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.1266863364 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.3723937215 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0507341481 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.3372593565 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.2495546416 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.505250418 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.1370990235 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.4188964845 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.3898113091 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.6349932626 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.4218934881 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.5699211354 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.2238263799 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.5157447202 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.3376407171 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.5674744623 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.2989764302 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.5575461672 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.4347143661 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.6526848356 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.2365245444 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.5263187531 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.4295443245 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.6303158648 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.2156464838 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.4961661832 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.3525318267 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.5377697887 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.1358779492 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.4047237198 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.2528406351 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.5425926629 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.3052635197 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.5444415164 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.319777613 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.5070316671 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.224754909 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.4942892862 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.1763683901 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.3592673643 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.0579407228 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.3218620552 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.1045487932 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.4627951581 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.2457303069 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.4607096598 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.2208751843 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.499725177 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.2080392025 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.5029005766 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.3955111551 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.5840966612 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.2108703792 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.4463761953 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.1957956536 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.5109625366 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.3026696791 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.5540321116 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.379491342 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.6341129937 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.2373352462 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.4812305289 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.3592771753 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.6196466978 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.2189074797 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.458549356 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.2096038798 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.48158495 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.3847076164 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.5961555843 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.3031970309 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.4938747459 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.1923337483 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.4073259848 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.1777393755 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.4229927395 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.1973765077 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.478302799 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.390011731 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.5192332126 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.0956165324 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.3010660185 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.0201085128 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.1279466164 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.3027350341 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.5488245098 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.3543135567 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.5739783335 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.2739321887 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.523898319 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.3702945368 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.5833117124 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0456473272 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.1799246176 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0060102851 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.0643020373 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.2282243664 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.4878680978 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.2701355148 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.4485608146 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.2068814622 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.4820023997 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.1797290418 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.5053214161 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.2800966186 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.5447813345 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.3743034645 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.5977965321 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0698928855 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.2670199291 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.0438604879 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.2188199264 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.169474795 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.4233016879 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.1115736327 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1741292068 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.1777667306 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.4381801577 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.0807523022 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.4027285347 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0871363585 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.3153084592 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0712302827 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.266201042 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.1722143774 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.451617464 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.1362044502 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2472412788 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.211203078 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.4926704854 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.2808125016 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.3349070044 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.1986426867 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.4568960366 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.228999134 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.5092438205 }, { "model":"openai\/gpt-4.1-mini", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.1996215211 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.452996678 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.1894542228 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.4104759123 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.0772997859 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.2795137394 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0490092548 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.2793012345 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.0821675771 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.3261780265 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.0581954137 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.2133137227 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.2038935703 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.4744865332 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.198427289 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.4466553325 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.2230716751 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.4868000305 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.3431634646 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.5539675011 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.0936861 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.4019740671 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1320458692 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.3969132003 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.147678651 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.3870664018 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.1925682475 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.424235974 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.1549108661 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.3657540248 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.1300687711 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.3850114254 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.2822625676 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.5120051075 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.1956229389 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.4165258378 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.1020059939 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.3685311802 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.1584050367 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.4078207292 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.129620916 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.4193741335 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.1911430477 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.423978547 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.1839199068 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.4589371965 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.1754477624 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.3729187467 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.2104863522 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.4534437048 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.2824626 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.4822940799 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.3067568845 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.522767718 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.2603548365 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.5472674101 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.061613272 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.2738044534 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.0495010223 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.2930209689 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.2480975275 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.4908345188 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.2780131154 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.5272272242 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.282506513 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.5438865496 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.4220387975 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.6440212985 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.2138985353 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.4787400928 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.3347502447 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.5277403226 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.4926163025 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.6598180449 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.5627424753 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.7654936904 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.2400384539 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.4846098061 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.331169359 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.541145091 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.1934774812 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.4555822394 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.1515704996 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.4288811212 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.3049498802 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.531726813 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.2572361601 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.5465796366 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.2245970544 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.4773636644 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.456500631 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.6519350009 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0508716923 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.2061725545 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.001678581 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.0842472305 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.1811700298 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.4450724584 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.1152635411 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.3751269086 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.1042620188 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.353512414 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.1697643488 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.4858315893 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.3100386494 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.5747433617 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.3858833658 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.5991711103 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.1533855474 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.4227840042 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.1131265551 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.3711711494 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.2089476707 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.4868357652 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.2953914361 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.5360583303 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.1907075731 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.4662972265 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.3386484563 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.6376664219 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.0974819198 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.3736857308 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.170722725 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.4039469282 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.1538751748 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.3921570735 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.1222763549 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.3651682861 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.2389788634 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.4950691973 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.3197810714 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.5649240218 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.2240038475 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.4860646744 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.2284065848 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.3753787999 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.1867349669 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.4122967846 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.20591358 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.5231507594 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.1757000759 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.4569938635 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.2608919204 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.495117819 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.1391396286 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.3419293202 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.11776525 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.3296737913 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.1659549387 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.4341818109 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.1728970527 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.4006604704 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.1418786679 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.4096096806 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.2767863837 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.343465352 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.0557337494 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.2661879916 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.010496354 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.2072817599 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.2919442529 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.558994569 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.2232843577 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.4785649547 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.1885764001 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.4905954379 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.1518354017 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.4184448049 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.2031343023 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.4809424331 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.1531227243 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.486171029 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.2649114053 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.5258722646 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.1638513843 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.3671674679 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.1755855974 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.464470709 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.2053629902 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.4466569291 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.2391713081 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.5284921106 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.3851770392 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.6778949951 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.1921402736 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.4286234239 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.226941594 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.4545167964 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.2044174225 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.4393220695 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.204358035 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.4838815717 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.222496921 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.455228974 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.2971608126 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.5809601739 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.0616783152 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.2486430016 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.1096684518 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.4236658223 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0470010342 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.3053087334 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0354609608 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.3000471846 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.1342178934 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.4186845018 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.0959778877 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.4062370429 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.3790281875 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.5948460259 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.4122107278 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.5709045042 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.1956391774 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.4696282098 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.3089097764 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.5592753275 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.2493437671 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.4876335319 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.3816451478 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.6189446172 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.2159926241 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.485645425 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.4231210461 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.611328256 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.2106935755 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.4916756186 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.2957139688 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.5505026606 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.0983614688 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.3467631983 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.2084502331 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.5081363979 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.183239364 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.4023445581 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.188899922 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.3987821089 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.1067913788 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.3496287521 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.1841725143 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.3632256251 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.0529920463 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.282085967 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.1362552545 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.4625012714 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.1467149035 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.3956649623 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.1685063005 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.4680460244 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.1938115187 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.4876215653 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.301648159 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.5098794037 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.1547225512 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.3915293941 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.1695373764 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.4747320433 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.2691126673 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.4857803464 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.3512121942 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.6095777745 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.1987953868 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.4232825095 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.2845246017 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.5836686109 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.1209729479 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.3863152501 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.2257337081 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.4945472603 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.3081208582 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.5470122853 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.2720935434 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.4603538628 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.1562871243 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.4076252967 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.1559524999 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.4140020888 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.1699563701 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.4279668426 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.3008412738 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.4707696326 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.0460275677 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.2471496791 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.0301094125 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.0934926984 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.1772339365 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.4203181275 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.3130066985 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.5687455638 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.2094411351 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.4706103434 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.3725710921 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.5552868727 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0334143542 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.1507136538 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0016606076 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.0502679049 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.1864708336 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.4670816214 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.2648238029 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.4478960511 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.1939396294 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.4361718347 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.1189904742 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.4347992199 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.2392083536 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.5109371286 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.3593480951 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.5929525126 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.0586128965 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.2347632724 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.0021532802 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.1133302543 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.160147676 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.391740055 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.0779637528 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1486256305 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.1080830211 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.337342999 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.0541477061 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.3900223164 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0799412014 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.2678562615 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0384415516 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.2172940187 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.1368607253 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.4397284879 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.1909241711 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2765267822 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.1574414981 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.4616304665 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.2468189144 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.3136635386 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.1806603372 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.4477026286 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.1983500358 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.4823277126 }, { "model":"openai\/gpt-4.1-nano", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"aeb", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"aeb", "task":"translation_from", "metric":"bleu", "score":0.2046887048 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"aeb", "task":"translation_from", "metric":"chrf", "score":0.3905043974 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"aeb", "task":"translation_to", "metric":"bleu", "score":0.2440190587 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"aeb", "task":"translation_to", "metric":"chrf", "score":0.4467530618 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"af", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"af", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"af", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"af", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"af", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.1088055906 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.2952376966 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.04860361 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.2749922921 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.080495827 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.287512266 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.0649609212 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.203944936 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.2086617902 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.4774317011 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.2730334942 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.5458981435 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.2530052174 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.5158812138 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.3439536667 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.5691908832 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.1377297001 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.4304104417 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1906837255 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.3931621016 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.1573943285 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.4039837102 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.2385684611 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.4690487202 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.1786795263 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.4382834543 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.142030089 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.3706217658 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.2061194828 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.38382712 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.2351754729 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.4412955741 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.1025818924 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.355623252 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.1290294373 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.3807908275 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"be", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"be", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"be", "task":"translation_from", "metric":"bleu", "score":0.116157646 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"be", "task":"translation_from", "metric":"chrf", "score":0.4411553165 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"be", "task":"translation_to", "metric":"bleu", "score":0.2284052455 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"be", "task":"translation_to", "metric":"chrf", "score":0.4432025312 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.1573424376 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.3813908093 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.1860567167 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.3798747224 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bm", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bm", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bm", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bm", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bm", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.2047894665 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.4476643899 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.3413387194 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.5056140066 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ca", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ca", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ca", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ca", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ca", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.3321604587 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.524735789 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.399945485 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.6275070378 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ckb", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ckb", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ckb", "task":"translation_from", "metric":"bleu", "score":0.0862560502 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ckb", "task":"translation_from", "metric":"chrf", "score":0.2788047314 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ckb", "task":"translation_to", "metric":"bleu", "score":0.0359802782 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ckb", "task":"translation_to", "metric":"chrf", "score":0.2225612749 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.2777777551 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.5317009045 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.2755276023 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.4907555325 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.2840890109 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.5146969249 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.3999539422 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.6267391818 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.2595428958 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.4813680319 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.3306804036 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.4976939797 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.5232930808 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.6688775695 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.6469796865 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.8203785308 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.2793939864 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.5176409834 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.4118937163 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.6353341411 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.2052699799 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.4764669046 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.2131911377 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.4147480093 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.3062563146 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.4925975136 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.32039199 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.5717901387 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.2706688563 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.5148499232 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.4808374237 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.6855290209 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0270875349 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.2100353402 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.052858761 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.1950018354 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.2245042279 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.4426786034 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.1920269509 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.4643025206 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.1154893286 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.3792147754 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.2191612695 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.4879764503 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.3473235908 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.5515454754 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.3991894826 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.6121310121 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.247888062 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.4353918541 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.1626119723 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.4423709529 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ht", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ht", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ht", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ht", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ht", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.2640028594 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.524505973 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.3929863672 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.5880857849 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.2522725561 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.5212732474 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.2850030055 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.5970450995 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.1326727529 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.3646478687 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.1882093096 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.4009607044 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ilo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ilo", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ilo", "task":"translation_from", "metric":"bleu", "score":0.1988516559 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ilo", "task":"translation_from", "metric":"chrf", "score":0.405478436 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ilo", "task":"translation_to", "metric":"bleu", "score":0.0961457593 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ilo", "task":"translation_to", "metric":"chrf", "score":0.4060794313 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.2746808629 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.5180176469 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.3112912727 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.5712680542 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.2363319461 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.4826308954 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.2593036542 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.4231415642 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.2480055389 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.4685108662 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.2241033812 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.5113817494 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ki", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ki", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ki", "task":"translation_from", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ki", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ki", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.147911394 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.3985376686 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.290182238 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.5572310551 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.2772807862 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.504897576 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.1404234583 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.3189837953 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.1849035655 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.430576325 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.2444722013 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.4742295195 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.2013601575 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.4475607863 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.1980395856 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.2888993735 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"lua", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"lua", "task":"translation_from", "metric":"bleu", "score":0.042643493 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"lua", "task":"translation_from", "metric":"chrf", "score":0.2339244707 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"lua", "task":"translation_to", "metric":"bleu", "score":0.0143208425 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"lua", "task":"translation_to", "metric":"chrf", "score":0.2382431413 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.2373436047 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.4564427975 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.2754056305 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.5123611693 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.2027297928 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.4495211176 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.1639594712 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.4482904829 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.2035781185 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.4598803974 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.1964079195 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.5179064416 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.2246064108 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.4311975246 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.2434125045 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.4971145063 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.1756463826 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.3823527701 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.2340922946 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.4581322597 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.2993296846 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.5743132494 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.3504238332 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.6154153931 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.1741885177 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.449774491 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.2106778 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.4610458467 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.2483418024 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.4432537254 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.2305902219 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.5017217229 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.2523126947 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.4772912105 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.3639443469 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.6252850371 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.0835095719 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.2797853634 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.0779315192 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.4121236337 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0511068522 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.2702934215 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0724907554 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.345324531 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.1164462601 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.3729006132 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.1379248705 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.4079943111 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.3640514137 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.5847142015 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.3988016179 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.5745254523 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.2319955399 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.4971130964 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.293769398 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.535772663 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.3014354397 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.5315937202 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.4535066637 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.6773057972 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"qu", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.2532461677 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.5201960699 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.5052082065 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.6686611337 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.2270580453 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.5034759488 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.3258505825 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.5592402358 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"rw", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"rw", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"rw", "task":"translation_from", "metric":"bleu", "score":0.0829790682 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"rw", "task":"translation_from", "metric":"chrf", "score":0.337986391 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"rw", "task":"translation_to", "metric":"bleu", "score":0.2129352292 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"rw", "task":"translation_to", "metric":"chrf", "score":0.5084793087 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.15024418 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.3597265355 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.2369214411 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.4711257499 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.0980707024 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.3109100287 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.1934430032 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.3560526886 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.0582100604 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.3075785834 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.1011833785 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.4367282377 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.1531795055 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.3616443224 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.2049307012 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.4719724156 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.2199024767 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.4907562634 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.3830980295 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.5736359642 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.2379030124 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.4403417868 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.2238060743 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.5243303769 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.2852268785 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.5304479976 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.3829618265 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.6326982198 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.4 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.222064455 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.4652246692 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.299635051 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.5860066036 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.1407382127 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.3831149186 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.214481784 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.4692538776 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.3646122831 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.5746253001 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.3003064302 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.5444122929 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"tg", "task":"classification", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"tg", "task":"translation_from", "metric":"bleu", "score":0.1128962774 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"tg", "task":"translation_from", "metric":"chrf", "score":0.3493465213 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"tg", "task":"translation_to", "metric":"bleu", "score":0.1841235337 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"tg", "task":"translation_to", "metric":"chrf", "score":0.417497165 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.2392194968 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.4936638572 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.3622208845 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.5010514821 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ti", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ti", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ti", "task":"translation_from", "metric":"bleu", "score":0.0408372058 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ti", "task":"translation_from", "metric":"chrf", "score":0.2042987422 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ti", "task":"translation_to", "metric":"bleu", "score":0.0231278614 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ti", "task":"translation_to", "metric":"chrf", "score":0.0849541719 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.2621174982 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.5019200442 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.3816990204 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.5999237379 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.2245951815 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.4480996711 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.3852002404 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.5711778517 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"umb", "task":"classification", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"umb", "task":"translation_from", "metric":"bleu", "score":0.0415789397 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"umb", "task":"translation_from", "metric":"chrf", "score":0.1350551103 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"umb", "task":"translation_to", "metric":"bleu", "score":0.0010517421 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"umb", "task":"translation_to", "metric":"chrf", "score":0.0968599255 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.2436356521 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.4877029713 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.2603784132 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.4734427307 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.1411472616 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.3855156193 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.2021458884 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.4930438511 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.1995232614 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.4582270744 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.2597310259 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.514972808 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"wo", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"wo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"wo", "task":"translation_from", "metric":"bleu", "score":0.08218909 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"wo", "task":"translation_from", "metric":"chrf", "score":0.2632475474 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"wo", "task":"translation_to", "metric":"bleu", "score":0.0383287658 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"wo", "task":"translation_to", "metric":"chrf", "score":0.1924695915 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.159437398 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.373213248 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.1199632327 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1706758411 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"xh", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"xh", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"xh", "task":"translation_from", "metric":"bleu", "score":0.158569201 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"xh", "task":"translation_from", "metric":"chrf", "score":0.3926886149 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"xh", "task":"translation_to", "metric":"bleu", "score":0.0591321886 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"xh", "task":"translation_to", "metric":"chrf", "score":0.3427023375 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.079919346 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.279919938 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0720231313 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.2187010976 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.2 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.8 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.6 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.1290283283 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.3797298683 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.1636055441 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2449501177 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.7 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.1773725218 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.458620733 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.2753136513 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.3274827604 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":1.0 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.9 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.5 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.1866603918 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.4108538087 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.164847197 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.4524644478 }, { "model":"openai\/gpt-4o-mini", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.055487033 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.195788708 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.1124541522 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.0818660054 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.2443718379 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.0312578478 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.0892192454 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.1879703279 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.4082307283 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.2032527408 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.4252508109 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.1511448079 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.3482345089 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.2660157525 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.4467729024 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.0827758372 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.2694627987 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1091045232 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.3272460856 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.1266156847 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.3088203065 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.1843867072 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.3542134538 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.1057754271 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.2879318321 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.0399332596 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.1436097331 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.2082859775 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.3721700071 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.1319221867 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.3256562506 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.0834540968 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.2549210714 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.0664704876 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.2928996174 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.1339563491 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.2272649787 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.1668638517 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.3029135903 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.1789816822 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.3618057577 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.1498239832 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.2860628236 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.2518844765 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.4578479284 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.1735279991 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.3732578771 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.1571790351 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.3555264704 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.2188910218 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.3725672408 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.1899770324 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.3001055745 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.3043234734 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.4952039495 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.1310803496 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.2985898996 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.1645861291 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.2473851762 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.1962899792 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.316041879 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.5755388881 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.7200533933 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.1318044964 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.3364780931 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.2564170555 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.4382398152 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.1533507904 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.3596802703 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.1539918473 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.3118556242 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.2008801988 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.3714765993 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.1940537936 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.4792247969 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.1824127171 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.3884840193 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.3876504315 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.5389108131 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.0738447366 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0240639493 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.1911668884 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.1949542031 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.4052894246 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.0646513332 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.1693257519 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.0392475471 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.126618872 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.0445527444 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.1499587951 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.2268010617 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.3688527647 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.2230471235 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.4142622149 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.1156575532 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.2236151918 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.0698561479 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.2305830236 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.1691753276 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.3383017469 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.1871269984 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.3731318657 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.1585058297 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.3050841055 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.1943793424 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.4973073268 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.0963109471 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.2727236688 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.0660035863 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.158376548 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.1542082331 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.3028124272 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.2460472209 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.5145531621 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.1612360434 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.3349199354 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.117016066 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.208990655 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.1729476776 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.3959364431 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.1785425051 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.4249603279 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.1793896966 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.4090639994 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.1405729124 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.3820186042 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.0698832994 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.2284330377 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.0851826028 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.1882485322 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.1743539627 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.4083936939 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.1166173259 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.2305285039 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.1358227204 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.3423557444 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.1953069902 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.2564265013 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.2314152421 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.4725672887 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.17835674 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.3229842432 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.1533461204 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.3472008961 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.1168236528 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.2271783619 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.0928957375 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.244366675 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.0070995906 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.1262437392 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.1971269045 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.3735961781 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.1323978127 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.1893359682 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.2135948303 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.3958565999 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.1503233282 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.275821329 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.1307137096 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.2920741112 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.3539116395 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.5832656935 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.1255424452 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.329335139 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.0516414641 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.1567420369 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.2023089106 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.3806028698 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.0980312706 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.264649599 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.1993799127 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.4072508102 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.213081855 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.4114277627 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.0393547699 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.163604057 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.0139872791 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.1446751186 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0330655518 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.1646544216 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0093749808 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.1074800017 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.223399015 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.4518680174 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.0545741621 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.1291918248 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.1485025023 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.3380088662 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.1374014148 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.2051503897 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.1586103513 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.302194795 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.3897966488 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.5766535228 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.137323181 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.3095327986 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.2669467187 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.3969322178 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.1792721327 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.4176277039 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.3094152813 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.4953887976 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.1584004696 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.38671906 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.3118021035 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.4665016839 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.1090127159 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.2748895651 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.1361757276 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.3342529311 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.1417180597 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.3566626666 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.0411134672 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.1347885554 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.0397889862 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.1881103773 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.0165581412 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.1414877053 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.0815647227 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.2487364334 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.0253075503 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.2274689496 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.1882164689 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.3943730373 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.2381466467 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.4346543613 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.15780181 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.3437437662 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.1075072996 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.4294170504 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.1975945861 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.374398795 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.345880422 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.5436350308 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.1913501957 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.3785864037 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.1219661246 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.3307175909 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.1483607311 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.3430470513 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.1257952581 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.2720024162 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.378185741 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.557354327 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.1301924452 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.2509083971 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":1.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.147695853 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.345415746 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.1876537733 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.3005344914 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.2078229702 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.3841741235 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.147060653 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.3761003189 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.1363197738 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.2845774158 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.3118632296 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.4917979058 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.1623147303 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.3145943461 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.2226311966 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.4414805706 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.0849999362 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.2406274728 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.279180562 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.5116111495 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.1911759573 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.3812712354 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.2038650525 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.4035183237 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.1435587328 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.3490715453 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.1113617435 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.171370503 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0366725514 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.2007464145 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0113521992 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.0657310926 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.1121528943 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.3178178007 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.1508604775 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2175191576 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.1039910991 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.2728656752 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.2326991429 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.2529625335 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.121119706 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.2540648952 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.0235611585 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.1930902578 }, { "model":"qwen\/qwen3-235b-a22b", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.0050031284 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.0924942363 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.0116797169 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.0585141671 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.0764700894 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.2647322624 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.0882851827 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.2833576827 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.106692739 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.295973969 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.0607816225 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.1277246917 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.0362905586 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.1081740165 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1172745059 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.2687874448 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.0438294169 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.108019816 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.0673513704 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.2079164994 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.0514848851 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.1736145704 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.0238993213 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.1411125068 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.1396565072 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.2428031494 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.0383901491 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.1005302975 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.0278489988 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.1190175818 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.0367390088 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.0930542371 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.0736017029 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.1960588462 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.0585115493 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.1332846728 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.0429848247 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.2044845821 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.0299885561 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.0829092043 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.1804319747 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.3101350547 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.0701534813 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.1430960661 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.1415195376 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.288893664 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.1392279949 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.2551496147 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.1564210937 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.3072042217 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.2457069766 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.3367028296 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.0578542202 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.1674077812 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.0663537525 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.1816240149 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.2474773351 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.3070994171 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.4019192682 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.4782905978 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.1341566102 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.2765542043 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.1161792768 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.2919898174 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.0817283606 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.241409878 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.0476911924 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.1464645136 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.1279159996 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.2829362826 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.0388514243 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.1409030042 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.134253193 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.2989125898 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.2077048384 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.2946343811 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0181579676 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.0543819937 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0084320884 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.0454173788 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.0897072533 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.200957971 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.0377543414 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.0111815534 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.1020769257 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.021314569 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.0830626431 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.1220519036 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.2639335507 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.0973415259 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.2016493248 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.0349119748 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.1556602705 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.0494255017 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.1669634575 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.1211482424 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.2505103528 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.164473668 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.3043231306 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.0833080649 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.2451270085 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.1631993738 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.3566219474 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.0073674163 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.0518830276 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.0292915569 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.0528593127 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.1046004559 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.2507392983 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.1255584711 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.2268219009 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.0941710304 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.2704334257 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.0992512617 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.1515216003 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.1016751568 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.2254142889 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.1238616028 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.3669735224 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.0534454977 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.193605134 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.0265149039 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.1928985788 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.0889265704 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.2173396783 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.0392373629 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.0711012835 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.1937626852 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.0417334285 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.2140485243 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.0725025436 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.07710948 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.0771251025 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.2350655007 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.1021996092 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.2179221866 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.0444196112 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.2032069734 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.0626942847 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.1962655319 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.0065978992 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.0423907689 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.0220679536 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.0589969179 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.1351914125 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.2585829871 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.0901612807 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.1634932067 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.0940283278 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.2412937356 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.1354857092 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.0831447868 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.2593808275 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.2740792798 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.4472159864 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.0693332571 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.1429077445 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.0671076397 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.2039597866 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.0167754523 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.1541036377 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.1500475481 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.2732969464 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.2819860484 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.4877091208 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.0158298608 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.116087277 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.0604762339 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.0477354473 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.0430498724 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.0224903847 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.1205338978 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.0083749469 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.0240423066 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.1058521796 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.2379060391 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.0340105109 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.1018981548 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.1200562696 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.2833586847 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.0585253067 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.226893054 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.0734185487 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.2234046866 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.2994720627 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.4412841692 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.0737479957 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.2334413367 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.2671240661 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.4582064143 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.0655622212 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.2295301444 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.1421377727 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.3364832122 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.0729593007 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.1686728011 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.0254323861 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.0173772616 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.094294748 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.0249272374 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.1318435849 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.0114625376 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.0309494652 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.018362811 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.0903053603 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.0361908088 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.0697403266 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.2326685525 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.0901351217 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.1761295618 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.0358186402 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.1577931474 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.0657495832 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.2377800817 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.1439757229 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.3280954777 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.1315707916 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.2532056747 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.0822395206 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.2293817888 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.0711409402 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.2295116951 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.0281826938 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.0907703349 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.2172508025 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.0867988442 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.2789247181 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.0817852216 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.1305309896 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.134883333 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.2942401793 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.1382385998 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.250580016 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.0975469561 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.2592386604 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.11477212 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.2025044003 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.1035672471 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.2255615453 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.1161482705 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.2453264465 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.069654902 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.2336481279 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.0972829087 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.2111334793 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.0562684736 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.2112928198 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.039901967 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.2071350414 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.092781215 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.2596321396 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.0743041275 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1172524094 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.0559568244 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.0188433826 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.0807367939 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.2250049533 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.1178746954 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.1300639553 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.1028766672 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.2670830369 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.1623451886 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.2013735709 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.0409146661 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.1675889915 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.0850348967 }, { "model":"qwen\/qwen3-30b-a3b", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ak", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ak", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ak", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ak", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ak", "task":"translation_from", "metric":"bleu", "score":0.0197191428 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ak", "task":"translation_from", "metric":"chrf", "score":0.1664518353 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ak", "task":"translation_to", "metric":"bleu", "score":0.022953237 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ak", "task":"translation_to", "metric":"chrf", "score":0.1745004402 }, { "model":"qwen\/qwen3-32b", "bcp_47":"am", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"am", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"am", "task":"mgsm", "metric":"accuracy", "score":0.2 }, { "model":"qwen\/qwen3-32b", "bcp_47":"am", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"am", "task":"translation_from", "metric":"bleu", "score":0.0796218409 }, { "model":"qwen\/qwen3-32b", "bcp_47":"am", "task":"translation_from", "metric":"chrf", "score":0.2147354921 }, { "model":"qwen\/qwen3-32b", "bcp_47":"am", "task":"translation_to", "metric":"bleu", "score":0.010900097 }, { "model":"qwen\/qwen3-32b", "bcp_47":"am", "task":"translation_to", "metric":"chrf", "score":0.0657137696 }, { "model":"qwen\/qwen3-32b", "bcp_47":"am", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"apc", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"apc", "task":"translation_from", "metric":"bleu", "score":0.132513614 }, { "model":"qwen\/qwen3-32b", "bcp_47":"apc", "task":"translation_from", "metric":"chrf", "score":0.3795883854 }, { "model":"qwen\/qwen3-32b", "bcp_47":"apc", "task":"translation_to", "metric":"bleu", "score":0.137952669 }, { "model":"qwen\/qwen3-32b", "bcp_47":"apc", "task":"translation_to", "metric":"chrf", "score":0.3264835371 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ar", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ar", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ar", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ar", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ar", "task":"translation_from", "metric":"bleu", "score":0.1656993834 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ar", "task":"translation_from", "metric":"chrf", "score":0.3867250082 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ar", "task":"translation_to", "metric":"bleu", "score":0.2206826239 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ar", "task":"translation_to", "metric":"chrf", "score":0.4063080067 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ary", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ary", "task":"translation_from", "metric":"bleu", "score":0.1079587982 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ary", "task":"translation_from", "metric":"chrf", "score":0.3785381059 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ary", "task":"translation_to", "metric":"bleu", "score":0.1218401593 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ary", "task":"translation_to", "metric":"chrf", "score":0.3257594737 }, { "model":"qwen\/qwen3-32b", "bcp_47":"arz", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"arz", "task":"translation_from", "metric":"bleu", "score":0.1042685601 }, { "model":"qwen\/qwen3-32b", "bcp_47":"arz", "task":"translation_from", "metric":"chrf", "score":0.3470585369 }, { "model":"qwen\/qwen3-32b", "bcp_47":"arz", "task":"translation_to", "metric":"bleu", "score":0.1401256855 }, { "model":"qwen\/qwen3-32b", "bcp_47":"arz", "task":"translation_to", "metric":"chrf", "score":0.3746457154 }, { "model":"qwen\/qwen3-32b", "bcp_47":"as", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"as", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"as", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-32b", "bcp_47":"as", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"as", "task":"translation_from", "metric":"bleu", "score":0.12200123 }, { "model":"qwen\/qwen3-32b", "bcp_47":"as", "task":"translation_from", "metric":"chrf", "score":0.3327938776 }, { "model":"qwen\/qwen3-32b", "bcp_47":"as", "task":"translation_to", "metric":"bleu", "score":0.0166926581 }, { "model":"qwen\/qwen3-32b", "bcp_47":"as", "task":"translation_to", "metric":"chrf", "score":0.0857862708 }, { "model":"qwen\/qwen3-32b", "bcp_47":"awa", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"awa", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"awa", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-32b", "bcp_47":"awa", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"awa", "task":"translation_from", "metric":"bleu", "score":0.2393773898 }, { "model":"qwen\/qwen3-32b", "bcp_47":"awa", "task":"translation_from", "metric":"chrf", "score":0.480146856 }, { "model":"qwen\/qwen3-32b", "bcp_47":"awa", "task":"translation_to", "metric":"bleu", "score":0.13553124 }, { "model":"qwen\/qwen3-32b", "bcp_47":"awa", "task":"translation_to", "metric":"chrf", "score":0.3086397875 }, { "model":"qwen\/qwen3-32b", "bcp_47":"az", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"az", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"az", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"qwen\/qwen3-32b", "bcp_47":"az", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"az", "task":"translation_from", "metric":"bleu", "score":0.0984273348 }, { "model":"qwen\/qwen3-32b", "bcp_47":"az", "task":"translation_from", "metric":"chrf", "score":0.2705806557 }, { "model":"qwen\/qwen3-32b", "bcp_47":"az", "task":"translation_to", "metric":"bleu", "score":0.0899956365 }, { "model":"qwen\/qwen3-32b", "bcp_47":"az", "task":"translation_to", "metric":"chrf", "score":0.3292198004 }, { "model":"qwen\/qwen3-32b", "bcp_47":"bho", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"bho", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"bho", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"bho", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"bho", "task":"translation_from", "metric":"bleu", "score":0.1944585572 }, { "model":"qwen\/qwen3-32b", "bcp_47":"bho", "task":"translation_from", "metric":"chrf", "score":0.4135649539 }, { "model":"qwen\/qwen3-32b", "bcp_47":"bho", "task":"translation_to", "metric":"bleu", "score":0.0841754475 }, { "model":"qwen\/qwen3-32b", "bcp_47":"bho", "task":"translation_to", "metric":"chrf", "score":0.2832267135 }, { "model":"qwen\/qwen3-32b", "bcp_47":"bn", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"bn", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"bn", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"bn", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"bn", "task":"translation_from", "metric":"bleu", "score":0.1905773039 }, { "model":"qwen\/qwen3-32b", "bcp_47":"bn", "task":"translation_from", "metric":"chrf", "score":0.3974640862 }, { "model":"qwen\/qwen3-32b", "bcp_47":"bn", "task":"translation_to", "metric":"bleu", "score":0.1738721227 }, { "model":"qwen\/qwen3-32b", "bcp_47":"bn", "task":"translation_to", "metric":"chrf", "score":0.3341823126 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ceb", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ceb", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ceb", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ceb", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ceb", "task":"translation_from", "metric":"bleu", "score":0.2913417198 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ceb", "task":"translation_from", "metric":"chrf", "score":0.4715886747 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ceb", "task":"translation_to", "metric":"bleu", "score":0.2042487615 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ceb", "task":"translation_to", "metric":"chrf", "score":0.3926894761 }, { "model":"qwen\/qwen3-32b", "bcp_47":"cs", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"cs", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"cs", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"qwen\/qwen3-32b", "bcp_47":"cs", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"cs", "task":"translation_from", "metric":"bleu", "score":0.1724601448 }, { "model":"qwen\/qwen3-32b", "bcp_47":"cs", "task":"translation_from", "metric":"chrf", "score":0.3857736694 }, { "model":"qwen\/qwen3-32b", "bcp_47":"cs", "task":"translation_to", "metric":"bleu", "score":0.2045983077 }, { "model":"qwen\/qwen3-32b", "bcp_47":"cs", "task":"translation_to", "metric":"chrf", "score":0.4057824257 }, { "model":"qwen\/qwen3-32b", "bcp_47":"de", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"de", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"de", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"de", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"de", "task":"translation_from", "metric":"bleu", "score":0.2174481184 }, { "model":"qwen\/qwen3-32b", "bcp_47":"de", "task":"translation_from", "metric":"chrf", "score":0.4124810034 }, { "model":"qwen\/qwen3-32b", "bcp_47":"de", "task":"translation_to", "metric":"bleu", "score":0.3022742815 }, { "model":"qwen\/qwen3-32b", "bcp_47":"de", "task":"translation_to", "metric":"chrf", "score":0.4968168009 }, { "model":"qwen\/qwen3-32b", "bcp_47":"el", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"el", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"el", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-32b", "bcp_47":"el", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"el", "task":"translation_from", "metric":"bleu", "score":0.1714951139 }, { "model":"qwen\/qwen3-32b", "bcp_47":"el", "task":"translation_from", "metric":"chrf", "score":0.3572714199 }, { "model":"qwen\/qwen3-32b", "bcp_47":"el", "task":"translation_to", "metric":"bleu", "score":0.1874209861 }, { "model":"qwen\/qwen3-32b", "bcp_47":"el", "task":"translation_to", "metric":"chrf", "score":0.3148917242 }, { "model":"qwen\/qwen3-32b", "bcp_47":"en", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"en", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"en", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"qwen\/qwen3-32b", "bcp_47":"en", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"en", "task":"translation_from", "metric":"bleu", "score":0.446850518 }, { "model":"qwen\/qwen3-32b", "bcp_47":"en", "task":"translation_from", "metric":"chrf", "score":0.5288962517 }, { "model":"qwen\/qwen3-32b", "bcp_47":"en", "task":"translation_to", "metric":"bleu", "score":0.6607457062 }, { "model":"qwen\/qwen3-32b", "bcp_47":"en", "task":"translation_to", "metric":"chrf", "score":0.828511917 }, { "model":"qwen\/qwen3-32b", "bcp_47":"en", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"es", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"es", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"es", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"qwen\/qwen3-32b", "bcp_47":"es", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"es", "task":"translation_from", "metric":"bleu", "score":0.2056231855 }, { "model":"qwen\/qwen3-32b", "bcp_47":"es", "task":"translation_from", "metric":"chrf", "score":0.4306632094 }, { "model":"qwen\/qwen3-32b", "bcp_47":"es", "task":"translation_to", "metric":"bleu", "score":0.279042145 }, { "model":"qwen\/qwen3-32b", "bcp_47":"es", "task":"translation_to", "metric":"chrf", "score":0.5148983586 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fa", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fa", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fa", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fa", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fa", "task":"translation_from", "metric":"bleu", "score":0.0657228626 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fa", "task":"translation_from", "metric":"chrf", "score":0.3139715852 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fa", "task":"translation_to", "metric":"bleu", "score":0.1461056975 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fa", "task":"translation_to", "metric":"chrf", "score":0.3475439511 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fil", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fil", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fil", "task":"translation_from", "metric":"bleu", "score":0.2438433878 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fil", "task":"translation_from", "metric":"chrf", "score":0.4512223379 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fil", "task":"translation_to", "metric":"bleu", "score":0.2208119792 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fil", "task":"translation_to", "metric":"chrf", "score":0.4378335772 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fr", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fr", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fr", "task":"translation_from", "metric":"bleu", "score":0.1721857235 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fr", "task":"translation_from", "metric":"chrf", "score":0.477225501 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fr", "task":"translation_to", "metric":"bleu", "score":0.4037533819 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fr", "task":"translation_to", "metric":"chrf", "score":0.6067640163 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fuv", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fuv", "task":"translation_from", "metric":"bleu", "score":0.0224194954 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fuv", "task":"translation_from", "metric":"chrf", "score":0.105046056 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fuv", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"fuv", "task":"translation_to", "metric":"chrf", "score":0.082276319 }, { "model":"qwen\/qwen3-32b", "bcp_47":"gu", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"gu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"gu", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-32b", "bcp_47":"gu", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"gu", "task":"translation_from", "metric":"bleu", "score":0.1698272846 }, { "model":"qwen\/qwen3-32b", "bcp_47":"gu", "task":"translation_from", "metric":"chrf", "score":0.3349276506 }, { "model":"qwen\/qwen3-32b", "bcp_47":"gu", "task":"translation_to", "metric":"bleu", "score":0.0740380781 }, { "model":"qwen\/qwen3-32b", "bcp_47":"gu", "task":"translation_to", "metric":"chrf", "score":0.2296558189 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ha", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ha", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ha", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ha", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ha", "task":"translation_from", "metric":"bleu", "score":0.0341558033 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ha", "task":"translation_from", "metric":"chrf", "score":0.1515090956 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ha", "task":"translation_to", "metric":"bleu", "score":0.0178215481 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ha", "task":"translation_to", "metric":"chrf", "score":0.206138289 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ha", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"hi", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"hi", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"hi", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"hi", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"hi", "task":"translation_from", "metric":"bleu", "score":0.220218347 }, { "model":"qwen\/qwen3-32b", "bcp_47":"hi", "task":"translation_from", "metric":"chrf", "score":0.4387911559 }, { "model":"qwen\/qwen3-32b", "bcp_47":"hi", "task":"translation_to", "metric":"bleu", "score":0.2024184343 }, { "model":"qwen\/qwen3-32b", "bcp_47":"hi", "task":"translation_to", "metric":"chrf", "score":0.4252839653 }, { "model":"qwen\/qwen3-32b", "bcp_47":"hne", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"hne", "task":"translation_from", "metric":"bleu", "score":0.1375555656 }, { "model":"qwen\/qwen3-32b", "bcp_47":"hne", "task":"translation_from", "metric":"chrf", "score":0.3016838615 }, { "model":"qwen\/qwen3-32b", "bcp_47":"hne", "task":"translation_to", "metric":"bleu", "score":0.1141821718 }, { "model":"qwen\/qwen3-32b", "bcp_47":"hne", "task":"translation_to", "metric":"chrf", "score":0.3531692508 }, { "model":"qwen\/qwen3-32b", "bcp_47":"hu", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"hu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"hu", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-32b", "bcp_47":"hu", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"hu", "task":"translation_from", "metric":"bleu", "score":0.201485104 }, { "model":"qwen\/qwen3-32b", "bcp_47":"hu", "task":"translation_from", "metric":"chrf", "score":0.4060774974 }, { "model":"qwen\/qwen3-32b", "bcp_47":"hu", "task":"translation_to", "metric":"bleu", "score":0.1845697152 }, { "model":"qwen\/qwen3-32b", "bcp_47":"hu", "task":"translation_to", "metric":"chrf", "score":0.3651920542 }, { "model":"qwen\/qwen3-32b", "bcp_47":"id", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"id", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"id", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-32b", "bcp_47":"id", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"id", "task":"translation_from", "metric":"bleu", "score":0.2119242961 }, { "model":"qwen\/qwen3-32b", "bcp_47":"id", "task":"translation_from", "metric":"chrf", "score":0.4027870816 }, { "model":"qwen\/qwen3-32b", "bcp_47":"id", "task":"translation_to", "metric":"bleu", "score":0.2845893115 }, { "model":"qwen\/qwen3-32b", "bcp_47":"id", "task":"translation_to", "metric":"chrf", "score":0.5618854988 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ig", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ig", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ig", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ig", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ig", "task":"translation_from", "metric":"bleu", "score":0.0631617801 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ig", "task":"translation_from", "metric":"chrf", "score":0.2207616259 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ig", "task":"translation_to", "metric":"bleu", "score":0.0508365473 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ig", "task":"translation_to", "metric":"chrf", "score":0.1697158135 }, { "model":"qwen\/qwen3-32b", "bcp_47":"it", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"it", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"it", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"it", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"it", "task":"translation_from", "metric":"bleu", "score":0.1840631549 }, { "model":"qwen\/qwen3-32b", "bcp_47":"it", "task":"translation_from", "metric":"chrf", "score":0.4301865089 }, { "model":"qwen\/qwen3-32b", "bcp_47":"it", "task":"translation_to", "metric":"bleu", "score":0.2482222138 }, { "model":"qwen\/qwen3-32b", "bcp_47":"it", "task":"translation_to", "metric":"chrf", "score":0.5228204728 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ja", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ja", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ja", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ja", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ja", "task":"translation_from", "metric":"bleu", "score":0.1127930596 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ja", "task":"translation_from", "metric":"chrf", "score":0.2787849105 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ja", "task":"translation_to", "metric":"bleu", "score":0.1840671906 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ja", "task":"translation_to", "metric":"chrf", "score":0.3135227124 }, { "model":"qwen\/qwen3-32b", "bcp_47":"jv", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"jv", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"jv", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-32b", "bcp_47":"jv", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"jv", "task":"translation_from", "metric":"bleu", "score":0.1834764341 }, { "model":"qwen\/qwen3-32b", "bcp_47":"jv", "task":"translation_from", "metric":"chrf", "score":0.3297107768 }, { "model":"qwen\/qwen3-32b", "bcp_47":"jv", "task":"translation_to", "metric":"bleu", "score":0.1569316995 }, { "model":"qwen\/qwen3-32b", "bcp_47":"jv", "task":"translation_to", "metric":"chrf", "score":0.467165329 }, { "model":"qwen\/qwen3-32b", "bcp_47":"kk", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"kk", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"kk", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-32b", "bcp_47":"kk", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"kk", "task":"translation_from", "metric":"bleu", "score":0.1096372066 }, { "model":"qwen\/qwen3-32b", "bcp_47":"kk", "task":"translation_from", "metric":"chrf", "score":0.2865411962 }, { "model":"qwen\/qwen3-32b", "bcp_47":"kk", "task":"translation_to", "metric":"bleu", "score":0.1388075288 }, { "model":"qwen\/qwen3-32b", "bcp_47":"kk", "task":"translation_to", "metric":"chrf", "score":0.3285307881 }, { "model":"qwen\/qwen3-32b", "bcp_47":"km", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"km", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"km", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-32b", "bcp_47":"km", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"km", "task":"translation_from", "metric":"bleu", "score":0.170745871 }, { "model":"qwen\/qwen3-32b", "bcp_47":"km", "task":"translation_from", "metric":"chrf", "score":0.3490488807 }, { "model":"qwen\/qwen3-32b", "bcp_47":"km", "task":"translation_to", "metric":"bleu", "score":0.0792740607 }, { "model":"qwen\/qwen3-32b", "bcp_47":"km", "task":"translation_to", "metric":"chrf", "score":0.2285805687 }, { "model":"qwen\/qwen3-32b", "bcp_47":"kn", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"kn", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"kn", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-32b", "bcp_47":"kn", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"kn", "task":"translation_from", "metric":"bleu", "score":0.1679784179 }, { "model":"qwen\/qwen3-32b", "bcp_47":"kn", "task":"translation_from", "metric":"chrf", "score":0.3620246212 }, { "model":"qwen\/qwen3-32b", "bcp_47":"kn", "task":"translation_to", "metric":"bleu", "score":0.1242897501 }, { "model":"qwen\/qwen3-32b", "bcp_47":"kn", "task":"translation_to", "metric":"chrf", "score":0.3007681742 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ko", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ko", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ko", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ko", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ko", "task":"translation_from", "metric":"bleu", "score":0.1276543618 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ko", "task":"translation_from", "metric":"chrf", "score":0.3677051571 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ko", "task":"translation_to", "metric":"bleu", "score":0.1980497946 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ko", "task":"translation_to", "metric":"chrf", "score":0.2107568779 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mag", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mag", "task":"translation_from", "metric":"bleu", "score":0.2450449733 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mag", "task":"translation_from", "metric":"chrf", "score":0.457594122 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mag", "task":"translation_to", "metric":"bleu", "score":0.198485011 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mag", "task":"translation_to", "metric":"chrf", "score":0.3848787397 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mai", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mai", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mai", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mai", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mai", "task":"translation_from", "metric":"bleu", "score":0.1927814544 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mai", "task":"translation_from", "metric":"chrf", "score":0.3935457095 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mai", "task":"translation_to", "metric":"bleu", "score":0.0565208468 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mai", "task":"translation_to", "metric":"chrf", "score":0.3131063701 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mg", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mg", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mg", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mg", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mg", "task":"translation_from", "metric":"bleu", "score":0.0354662811 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mg", "task":"translation_from", "metric":"chrf", "score":0.1686638218 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mg", "task":"translation_to", "metric":"bleu", "score":0.0314143451 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mg", "task":"translation_to", "metric":"chrf", "score":0.2728599885 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ml", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ml", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ml", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ml", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ml", "task":"translation_from", "metric":"bleu", "score":0.2371717296 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ml", "task":"translation_from", "metric":"chrf", "score":0.4148173757 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ml", "task":"translation_to", "metric":"bleu", "score":0.1193636287 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ml", "task":"translation_to", "metric":"chrf", "score":0.2090448587 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mr", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mr", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mr", "task":"translation_from", "metric":"bleu", "score":0.1181276928 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mr", "task":"translation_from", "metric":"chrf", "score":0.3044523516 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mr", "task":"translation_to", "metric":"bleu", "score":0.0922160441 }, { "model":"qwen\/qwen3-32b", "bcp_47":"mr", "task":"translation_to", "metric":"chrf", "score":0.2284498534 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ms", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ms", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ms", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ms", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ms", "task":"translation_from", "metric":"bleu", "score":0.1968422851 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ms", "task":"translation_from", "metric":"chrf", "score":0.4053316305 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ms", "task":"translation_to", "metric":"bleu", "score":0.3766906478 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ms", "task":"translation_to", "metric":"chrf", "score":0.6371848492 }, { "model":"qwen\/qwen3-32b", "bcp_47":"my", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"my", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"my", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-32b", "bcp_47":"my", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"my", "task":"translation_from", "metric":"bleu", "score":0.1225410694 }, { "model":"qwen\/qwen3-32b", "bcp_47":"my", "task":"translation_from", "metric":"chrf", "score":0.3037176244 }, { "model":"qwen\/qwen3-32b", "bcp_47":"my", "task":"translation_to", "metric":"bleu", "score":0.1060809306 }, { "model":"qwen\/qwen3-32b", "bcp_47":"my", "task":"translation_to", "metric":"chrf", "score":0.2215712232 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ne", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ne", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ne", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ne", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ne", "task":"translation_from", "metric":"bleu", "score":0.1375199333 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ne", "task":"translation_from", "metric":"chrf", "score":0.3471041134 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ne", "task":"translation_to", "metric":"bleu", "score":0.1115971998 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ne", "task":"translation_to", "metric":"chrf", "score":0.2915076183 }, { "model":"qwen\/qwen3-32b", "bcp_47":"nl", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"nl", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"nl", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-32b", "bcp_47":"nl", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"nl", "task":"translation_from", "metric":"bleu", "score":0.2385777935 }, { "model":"qwen\/qwen3-32b", "bcp_47":"nl", "task":"translation_from", "metric":"chrf", "score":0.46025989 }, { "model":"qwen\/qwen3-32b", "bcp_47":"nl", "task":"translation_to", "metric":"bleu", "score":0.2726117583 }, { "model":"qwen\/qwen3-32b", "bcp_47":"nl", "task":"translation_to", "metric":"chrf", "score":0.5059991136 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ny", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ny", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ny", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ny", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ny", "task":"translation_from", "metric":"bleu", "score":0.0351245421 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ny", "task":"translation_from", "metric":"chrf", "score":0.1823407405 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ny", "task":"translation_to", "metric":"bleu", "score":0.0228348515 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ny", "task":"translation_to", "metric":"chrf", "score":0.2201854752 }, { "model":"qwen\/qwen3-32b", "bcp_47":"om", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"om", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"om", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"qwen\/qwen3-32b", "bcp_47":"om", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"om", "task":"translation_from", "metric":"bleu", "score":0.008627568 }, { "model":"qwen\/qwen3-32b", "bcp_47":"om", "task":"translation_from", "metric":"chrf", "score":0.190294404 }, { "model":"qwen\/qwen3-32b", "bcp_47":"om", "task":"translation_to", "metric":"bleu", "score":0.0084651752 }, { "model":"qwen\/qwen3-32b", "bcp_47":"om", "task":"translation_to", "metric":"chrf", "score":0.1971638266 }, { "model":"qwen\/qwen3-32b", "bcp_47":"or", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"or", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"or", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-32b", "bcp_47":"or", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"or", "task":"translation_from", "metric":"bleu", "score":0.1927446862 }, { "model":"qwen\/qwen3-32b", "bcp_47":"or", "task":"translation_from", "metric":"chrf", "score":0.3892188652 }, { "model":"qwen\/qwen3-32b", "bcp_47":"or", "task":"translation_to", "metric":"bleu", "score":0.0747658241 }, { "model":"qwen\/qwen3-32b", "bcp_47":"or", "task":"translation_to", "metric":"chrf", "score":0.2195369005 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pa", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pa", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pa", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pa", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pa", "task":"translation_from", "metric":"bleu", "score":0.3258973448 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pa", "task":"translation_from", "metric":"chrf", "score":0.4626835685 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pa", "task":"translation_to", "metric":"bleu", "score":0.2107798391 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pa", "task":"translation_to", "metric":"chrf", "score":0.3008568297 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pl", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pl", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pl", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pl", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pl", "task":"translation_from", "metric":"bleu", "score":0.1854293513 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pl", "task":"translation_from", "metric":"chrf", "score":0.4307605073 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pl", "task":"translation_to", "metric":"bleu", "score":0.2420005385 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pl", "task":"translation_to", "metric":"chrf", "score":0.4288325052 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ps", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ps", "task":"mgsm", "metric":"accuracy", "score":0.5 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ps", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pt", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pt", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pt", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pt", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pt", "task":"translation_from", "metric":"bleu", "score":0.2177730164 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pt", "task":"translation_from", "metric":"chrf", "score":0.433438268 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pt", "task":"translation_to", "metric":"bleu", "score":0.3402518575 }, { "model":"qwen\/qwen3-32b", "bcp_47":"pt", "task":"translation_to", "metric":"chrf", "score":0.4997252818 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ro", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ro", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ro", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ro", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ro", "task":"translation_from", "metric":"bleu", "score":0.1593298949 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ro", "task":"translation_from", "metric":"chrf", "score":0.4017223467 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ro", "task":"translation_to", "metric":"bleu", "score":0.2726504789 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ro", "task":"translation_to", "metric":"chrf", "score":0.4406178765 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ru", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ru", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ru", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ru", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ru", "task":"translation_from", "metric":"bleu", "score":0.1869622361 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ru", "task":"translation_from", "metric":"chrf", "score":0.4301337345 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ru", "task":"translation_to", "metric":"bleu", "score":0.2546688585 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ru", "task":"translation_to", "metric":"chrf", "score":0.3959108821 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sd", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sd", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sd", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sd", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sd", "task":"translation_from", "metric":"bleu", "score":0.1610657464 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sd", "task":"translation_from", "metric":"chrf", "score":0.3043802738 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sd", "task":"translation_to", "metric":"bleu", "score":0.0315314884 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sd", "task":"translation_to", "metric":"chrf", "score":0.2120098132 }, { "model":"qwen\/qwen3-32b", "bcp_47":"si", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"si", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"si", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-32b", "bcp_47":"si", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"si", "task":"translation_from", "metric":"bleu", "score":0.0130905001 }, { "model":"qwen\/qwen3-32b", "bcp_47":"si", "task":"translation_from", "metric":"chrf", "score":0.1973781543 }, { "model":"qwen\/qwen3-32b", "bcp_47":"si", "task":"translation_to", "metric":"bleu", "score":0.0134555536 }, { "model":"qwen\/qwen3-32b", "bcp_47":"si", "task":"translation_to", "metric":"chrf", "score":0.1176319627 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sn", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sn", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sn", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sn", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sn", "task":"translation_from", "metric":"bleu", "score":0.0330243636 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sn", "task":"translation_from", "metric":"chrf", "score":0.1963889628 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sn", "task":"translation_to", "metric":"bleu", "score":0.0428214603 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sn", "task":"translation_to", "metric":"chrf", "score":0.2352543457 }, { "model":"qwen\/qwen3-32b", "bcp_47":"so", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"so", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"so", "task":"mgsm", "metric":"accuracy", "score":0.3 }, { "model":"qwen\/qwen3-32b", "bcp_47":"so", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"so", "task":"translation_from", "metric":"bleu", "score":0.0329294407 }, { "model":"qwen\/qwen3-32b", "bcp_47":"so", "task":"translation_from", "metric":"chrf", "score":0.1742241015 }, { "model":"qwen\/qwen3-32b", "bcp_47":"so", "task":"translation_to", "metric":"bleu", "score":0.0243956065 }, { "model":"qwen\/qwen3-32b", "bcp_47":"so", "task":"translation_to", "metric":"chrf", "score":0.1607978429 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sr", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sr", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sr", "task":"translation_from", "metric":"bleu", "score":0.1604574347 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sr", "task":"translation_from", "metric":"chrf", "score":0.4084047683 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sr", "task":"translation_to", "metric":"bleu", "score":0.1664454505 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sr", "task":"translation_to", "metric":"chrf", "score":0.34203002 }, { "model":"qwen\/qwen3-32b", "bcp_47":"su", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"su", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"su", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-32b", "bcp_47":"su", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"su", "task":"translation_from", "metric":"bleu", "score":0.1257406217 }, { "model":"qwen\/qwen3-32b", "bcp_47":"su", "task":"translation_from", "metric":"chrf", "score":0.3378715267 }, { "model":"qwen\/qwen3-32b", "bcp_47":"su", "task":"translation_to", "metric":"bleu", "score":0.0667296519 }, { "model":"qwen\/qwen3-32b", "bcp_47":"su", "task":"translation_to", "metric":"chrf", "score":0.3980540266 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sv", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sv", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sv", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sv", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sv", "task":"translation_from", "metric":"bleu", "score":0.1672894127 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sv", "task":"translation_from", "metric":"chrf", "score":0.3684788102 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sv", "task":"translation_to", "metric":"bleu", "score":0.3614873089 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sv", "task":"translation_to", "metric":"chrf", "score":0.5392220773 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sw", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sw", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sw", "task":"mgsm", "metric":"accuracy", "score":0.8 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sw", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sw", "task":"translation_from", "metric":"bleu", "score":0.1200999603 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sw", "task":"translation_from", "metric":"chrf", "score":0.2908221442 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sw", "task":"translation_to", "metric":"bleu", "score":0.023689627 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sw", "task":"translation_to", "metric":"chrf", "score":0.2453164021 }, { "model":"qwen\/qwen3-32b", "bcp_47":"sw", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ta", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ta", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ta", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ta", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ta", "task":"translation_from", "metric":"bleu", "score":0.1882602024 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ta", "task":"translation_from", "metric":"chrf", "score":0.4059862729 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ta", "task":"translation_to", "metric":"bleu", "score":0.0841932466 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ta", "task":"translation_to", "metric":"chrf", "score":0.260536888 }, { "model":"qwen\/qwen3-32b", "bcp_47":"te", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"te", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"te", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"te", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"te", "task":"translation_from", "metric":"bleu", "score":0.3422950731 }, { "model":"qwen\/qwen3-32b", "bcp_47":"te", "task":"translation_from", "metric":"chrf", "score":0.4841100904 }, { "model":"qwen\/qwen3-32b", "bcp_47":"te", "task":"translation_to", "metric":"bleu", "score":0.1765162745 }, { "model":"qwen\/qwen3-32b", "bcp_47":"te", "task":"translation_to", "metric":"chrf", "score":0.3133803312 }, { "model":"qwen\/qwen3-32b", "bcp_47":"th", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"th", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"th", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"th", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"th", "task":"translation_from", "metric":"bleu", "score":0.196772439 }, { "model":"qwen\/qwen3-32b", "bcp_47":"th", "task":"translation_from", "metric":"chrf", "score":0.4191567084 }, { "model":"qwen\/qwen3-32b", "bcp_47":"th", "task":"translation_to", "metric":"bleu", "score":0.2773920621 }, { "model":"qwen\/qwen3-32b", "bcp_47":"th", "task":"translation_to", "metric":"chrf", "score":0.4482220675 }, { "model":"qwen\/qwen3-32b", "bcp_47":"tr", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"tr", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"tr", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"tr", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"tr", "task":"translation_from", "metric":"bleu", "score":0.2102634926 }, { "model":"qwen\/qwen3-32b", "bcp_47":"tr", "task":"translation_from", "metric":"chrf", "score":0.4159372483 }, { "model":"qwen\/qwen3-32b", "bcp_47":"tr", "task":"translation_to", "metric":"bleu", "score":0.2364464274 }, { "model":"qwen\/qwen3-32b", "bcp_47":"tr", "task":"translation_to", "metric":"chrf", "score":0.4789525721 }, { "model":"qwen\/qwen3-32b", "bcp_47":"uk", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"uk", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"uk", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"uk", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"uk", "task":"translation_from", "metric":"bleu", "score":0.1579324347 }, { "model":"qwen\/qwen3-32b", "bcp_47":"uk", "task":"translation_from", "metric":"chrf", "score":0.3647294785 }, { "model":"qwen\/qwen3-32b", "bcp_47":"uk", "task":"translation_to", "metric":"bleu", "score":0.3006688281 }, { "model":"qwen\/qwen3-32b", "bcp_47":"uk", "task":"translation_to", "metric":"chrf", "score":0.468364849 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ur", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ur", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ur", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ur", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ur", "task":"translation_from", "metric":"bleu", "score":0.1705686173 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ur", "task":"translation_from", "metric":"chrf", "score":0.3937623183 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ur", "task":"translation_to", "metric":"bleu", "score":0.1711380057 }, { "model":"qwen\/qwen3-32b", "bcp_47":"ur", "task":"translation_to", "metric":"chrf", "score":0.3502063066 }, { "model":"qwen\/qwen3-32b", "bcp_47":"uz", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"uz", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"uz", "task":"mgsm", "metric":"accuracy", "score":0.6 }, { "model":"qwen\/qwen3-32b", "bcp_47":"uz", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"uz", "task":"translation_from", "metric":"bleu", "score":0.1315627205 }, { "model":"qwen\/qwen3-32b", "bcp_47":"uz", "task":"translation_from", "metric":"chrf", "score":0.3801109933 }, { "model":"qwen\/qwen3-32b", "bcp_47":"uz", "task":"translation_to", "metric":"bleu", "score":0.1184994967 }, { "model":"qwen\/qwen3-32b", "bcp_47":"uz", "task":"translation_to", "metric":"chrf", "score":0.3552530055 }, { "model":"qwen\/qwen3-32b", "bcp_47":"vi", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"vi", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"vi", "task":"mgsm", "metric":"accuracy", "score":0.7 }, { "model":"qwen\/qwen3-32b", "bcp_47":"vi", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"vi", "task":"translation_from", "metric":"bleu", "score":0.2826147232 }, { "model":"qwen\/qwen3-32b", "bcp_47":"vi", "task":"translation_from", "metric":"chrf", "score":0.5352078445 }, { "model":"qwen\/qwen3-32b", "bcp_47":"vi", "task":"translation_to", "metric":"bleu", "score":0.2891587891 }, { "model":"qwen\/qwen3-32b", "bcp_47":"vi", "task":"translation_to", "metric":"chrf", "score":0.5030568081 }, { "model":"qwen\/qwen3-32b", "bcp_47":"wuu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"wuu", "task":"translation_from", "metric":"bleu", "score":0.1923006081 }, { "model":"qwen\/qwen3-32b", "bcp_47":"wuu", "task":"translation_from", "metric":"chrf", "score":0.3850610484 }, { "model":"qwen\/qwen3-32b", "bcp_47":"wuu", "task":"translation_to", "metric":"bleu", "score":0.0704239199 }, { "model":"qwen\/qwen3-32b", "bcp_47":"wuu", "task":"translation_to", "metric":"chrf", "score":0.1309128692 }, { "model":"qwen\/qwen3-32b", "bcp_47":"yo", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"yo", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"yo", "task":"mgsm", "metric":"accuracy", "score":0.1 }, { "model":"qwen\/qwen3-32b", "bcp_47":"yo", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"yo", "task":"translation_from", "metric":"bleu", "score":0.0078376559 }, { "model":"qwen\/qwen3-32b", "bcp_47":"yo", "task":"translation_from", "metric":"chrf", "score":0.1634867622 }, { "model":"qwen\/qwen3-32b", "bcp_47":"yo", "task":"translation_to", "metric":"bleu", "score":0.028486223 }, { "model":"qwen\/qwen3-32b", "bcp_47":"yo", "task":"translation_to", "metric":"chrf", "score":0.0875657048 }, { "model":"qwen\/qwen3-32b", "bcp_47":"yo", "task":"truthfulqa", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"yue", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"yue", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"yue", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"yue", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"yue", "task":"translation_from", "metric":"bleu", "score":0.1257691602 }, { "model":"qwen\/qwen3-32b", "bcp_47":"yue", "task":"translation_from", "metric":"chrf", "score":0.3303444225 }, { "model":"qwen\/qwen3-32b", "bcp_47":"yue", "task":"translation_to", "metric":"bleu", "score":0.159764099 }, { "model":"qwen\/qwen3-32b", "bcp_47":"yue", "task":"translation_to", "metric":"chrf", "score":0.2311709663 }, { "model":"qwen\/qwen3-32b", "bcp_47":"zh", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"zh", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"zh", "task":"mgsm", "metric":"accuracy", "score":0.4 }, { "model":"qwen\/qwen3-32b", "bcp_47":"zh", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"zh", "task":"translation_from", "metric":"bleu", "score":0.170228681 }, { "model":"qwen\/qwen3-32b", "bcp_47":"zh", "task":"translation_from", "metric":"chrf", "score":0.4530475535 }, { "model":"qwen\/qwen3-32b", "bcp_47":"zh", "task":"translation_to", "metric":"bleu", "score":0.2387260041 }, { "model":"qwen\/qwen3-32b", "bcp_47":"zh", "task":"translation_to", "metric":"chrf", "score":0.288127087 }, { "model":"qwen\/qwen3-32b", "bcp_47":"zu", "task":"arc", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"zu", "task":"classification", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"zu", "task":"mgsm", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"zu", "task":"mmlu", "metric":"accuracy", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"zu", "task":"translation_from", "metric":"bleu", "score":0.04772924 }, { "model":"qwen\/qwen3-32b", "bcp_47":"zu", "task":"translation_from", "metric":"chrf", "score":0.237905051 }, { "model":"qwen\/qwen3-32b", "bcp_47":"zu", "task":"translation_to", "metric":"bleu", "score":0.0 }, { "model":"qwen\/qwen3-32b", "bcp_47":"zu", "task":"translation_to", "metric":"chrf", "score":0.1114053338 }, { "model":"qwen\/qwen3-32b", "bcp_47":"zu", "task":"truthfulqa", "metric":"accuracy", "score":0.0 } ]