diff --git "a/frontend/public/results.json" "b/frontend/public/results.json" --- "a/frontend/public/results.json" +++ "b/frontend/public/results.json" @@ -194,18 +194,6 @@ "translation_bleu": 0.37, "translation_chrf": 0.49 }, - { - "language_name": "French", - "speakers": 278611507, - "family": "Indo-European", - "average": 0.46, - "in_benchmark": true, - "NaN": 0.0, - "classification_accuracy": 0.53, - "language_modeling_chrf": 0.98, - "translation_bleu": 0.32, - "translation_chrf": 0.49 - }, { "language_name": "Chinese", "speakers": 1304678914, @@ -230,18 +218,6 @@ "translation_bleu": 0.32, "translation_chrf": 0.49 }, - { - "language_name": "Portuguese", - "speakers": 237496885, - "family": "Indo-European", - "average": 0.46, - "in_benchmark": true, - "NaN": 0.0, - "classification_accuracy": 0.5, - "language_modeling_chrf": 0.97, - "translation_bleu": 0.31, - "translation_chrf": 0.49 - }, { "language_name": "Spanish", "speakers": 493528077, @@ -254,6 +230,18 @@ "translation_bleu": 0.28, "translation_chrf": 0.46 }, + { + "language_name": "Arabic", + "speakers": 351664197, + "family": "Afro-Asiatic", + "average": 0.42, + "in_benchmark": true, + "NaN": 0.0, + "classification_accuracy": 0.43, + "language_modeling_chrf": 0.93, + "translation_bleu": 0.28, + "translation_chrf": 0.47 + }, { "language_name": "Urdu", "speakers": 290790290, @@ -267,29 +255,17 @@ "translation_chrf": 0.42 }, { - "language_name": "Punjabi", - "speakers": 203571210, + "language_name": "French", + "speakers": 278611507, "family": "Indo-European", - "average": 0.42, + "average": 0.46, "in_benchmark": true, "NaN": 0.0, - "classification_accuracy": 0.4, - "language_modeling_chrf": 0.87, - "translation_bleu": 0.34, + "classification_accuracy": 0.53, + "language_modeling_chrf": 0.98, + "translation_bleu": 0.32, "translation_chrf": 0.49 }, - { - "language_name": "Arabic", - "speakers": 351664197, - "family": "Afro-Asiatic", - "average": 0.42, - "in_benchmark": true, - "NaN": 0.0, - "classification_accuracy": 0.43, - "language_modeling_chrf": 0.93, - "translation_bleu": 0.28, - "translation_chrf": 0.47 - }, { "language_name": "Bangla", "speakers": 267193288, @@ -303,11 +279,35 @@ "translation_chrf": 0.41 }, { - "language_name": "Afar", - "speakers": 2119663, - "family": "Afro-Asiatic", + "language_name": "Portuguese", + "speakers": 237496885, + "family": "Indo-European", + "average": 0.46, + "in_benchmark": true, + "NaN": 0.0, + "classification_accuracy": 0.5, + "language_modeling_chrf": 0.97, + "translation_bleu": 0.31, + "translation_chrf": 0.49 + }, + { + "language_name": "Punjabi", + "speakers": 203571210, + "family": "Indo-European", + "average": 0.42, + "in_benchmark": true, + "NaN": 0.0, + "classification_accuracy": 0.4, + "language_modeling_chrf": 0.87, + "translation_bleu": 0.34, + "translation_chrf": 0.49 + }, + { + "language_name": "Russian", + "speakers": 195841151, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -315,11 +315,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Okanagan", - "speakers": 490, - "family": "Salishan", + "language_name": "Swahili", + "speakers": 171610296, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -327,11 +327,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Nzima", - "speakers": 293402, - "family": "Atlantic-Congo", + "language_name": "Indonesian", + "speakers": 171207687, + "family": "Austronesian", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -339,8 +339,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Occitan", - "speakers": 2040398, + "language_name": "German", + "speakers": 136350226, "family": "Indo-European", "average": 0.0, "in_benchmark": true, @@ -351,11 +351,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Ojibwa", - "speakers": 23747, - "family": "Algic", + "language_name": "Japanese", + "speakers": 119729026, + "family": "Japonic", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -363,11 +363,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Oji-Cree", - "speakers": 15078, - "family": "Algic", + "language_name": "Telugu", + "speakers": 95478480, + "family": "Dravidian", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -375,11 +375,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Odia", - "speakers": 42434880, + "language_name": "Western Panjabi", + "speakers": 93433552, "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -387,9 +387,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Oromo", - "speakers": 34897121, - "family": "Afro-Asiatic", + "language_name": "Marathi", + "speakers": 92826300, + "family": "Indo-European", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -399,11 +399,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Nyamwezi", - "speakers": 1932242, - "family": "Atlantic-Congo", + "language_name": "Javanese", + "speakers": 91180665, + "family": "Austronesian", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -411,11 +411,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Ossetic", - "speakers": 541444, - "family": "Indo-European", + "language_name": "Vietnamese", + "speakers": 86222962, + "family": "Austroasiatic", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -423,11 +423,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Osage", - "speakers": 0, - "family": "Siouan", + "language_name": "Tamil", + "speakers": 85616159, + "family": "Dravidian", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -435,9 +435,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Pangasinan", - "speakers": 1528534, - "family": "Austronesian", + "language_name": "Persian", + "speakers": 84710459, + "family": "Indo-European", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -447,9 +447,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Pampanga", - "speakers": 2511163, - "family": "Austronesian", + "language_name": "Wu Chinese", + "speakers": 83641200, + "family": "Sino-Tibetan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -459,9 +459,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Papiamento", - "speakers": 211640, - "family": "Indo-European", + "language_name": "Turkish", + "speakers": 80360704, + "family": "Turkic", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -471,11 +471,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Palauan", - "speakers": 16047, - "family": "Austronesian", + "language_name": "Cantonese", + "speakers": 79654759, + "family": "Sino-Tibetan", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -483,11 +483,23 @@ "translation_chrf": 0.0 }, { - "language_name": "Picard", - "speakers": 746330, + "language_name": "Korean", + "speakers": 78357046, + "family": "Koreanic", + "average": 0.0, + "in_benchmark": true, + "NaN": 0.0, + "classification_accuracy": 0.0, + "language_modeling_chrf": 0.0, + "translation_bleu": 0.0, + "translation_chrf": 0.0 + }, + { + "language_name": "Italian", + "speakers": 70247060, "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -495,11 +507,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Nyankole", - "speakers": 2724939, - "family": "Atlantic-Congo", + "language_name": "Filipino", + "speakers": 67471096, + "family": "Austronesian", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -507,11 +519,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Navajo", - "speakers": 166320, - "family": "Athabaskan-Eyak-Tlingit", + "language_name": "Egyptian Arabic", + "speakers": 66639360, + "family": "Afro-Asiatic", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -519,9 +531,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Nyanja", - "speakers": 17026781, - "family": "Atlantic-Congo", + "language_name": "Gujarati", + "speakers": 61721799, + "family": "Indo-European", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -531,11 +543,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Ngiemboon", - "speakers": 388430, - "family": "Atlantic-Congo", + "language_name": "Thai", + "speakers": 55181920, + "family": "Tai-Kadai", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -543,9 +555,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Western Huasteca Nahuatl", - "speakers": 501735, - "family": "Uto-Aztecan", + "language_name": "Pashto", + "speakers": 53542641, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -555,11 +567,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Ngaju", - "speakers": 987996, - "family": "Austronesian", + "language_name": "Kannada", + "speakers": 49065330, + "family": "Dravidian", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -567,9 +579,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Niuean", - "speakers": 1120, - "family": "Austronesian", + "language_name": "Nigerian Pidgin", + "speakers": 44945880, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -579,11 +591,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Ao Naga", - "speakers": 305001, - "family": "Sino-Tibetan", + "language_name": "Malayalam", + "speakers": 43257484, + "family": "Dravidian", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -591,8 +603,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Dutch", - "speakers": 31765645, + "language_name": "Odia", + "speakers": 42434880, "family": "Indo-European", "average": 0.0, "in_benchmark": true, @@ -603,9 +615,21 @@ "translation_chrf": 0.0 }, { - "language_name": "Kwasio", - "speakers": 8878, - "family": "Atlantic-Congo", + "language_name": "Polish", + "speakers": 41077399, + "family": "Indo-European", + "average": 0.0, + "in_benchmark": true, + "NaN": 0.0, + "classification_accuracy": 0.0, + "language_modeling_chrf": 0.0, + "translation_bleu": 0.0, + "translation_chrf": 0.0 + }, + { + "language_name": "Xiang Chinese", + "speakers": 40426580, + "family": "Sino-Tibetan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -615,9 +639,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Norwegian Nynorsk", - "speakers": 1366860, - "family": "Indo-European", + "language_name": "Hausa", + "speakers": 40411882, + "family": "Afro-Asiatic", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -627,11 +651,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Norwegian", - "speakers": 5467440, + "language_name": "Sindhi", + "speakers": 40329510, "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -639,9 +663,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Naxi", - "speakers": 334565, - "family": "Sino-Tibetan", + "language_name": "North Levantine Arabic", + "speakers": 39031474, + "family": "Afro-Asiatic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -651,11 +675,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Northern Thai", - "speakers": 6621830, - "family": "Tai-Kadai", + "language_name": "Malay", + "speakers": 38097307, + "family": "Austronesian", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -663,11 +687,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Nimadi", - "speakers": 1723917, - "family": "Indo-European", + "language_name": "Burmese", + "speakers": 36559231, + "family": "Sino-Tibetan", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -675,9 +699,9 @@ "translation_chrf": 0.0 }, { - "language_name": "N’Ko", - "speakers": 626370, - "family": "Artificial Language", + "language_name": "Amharic", + "speakers": 35728475, + "family": "Afro-Asiatic", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -687,9 +711,9 @@ "translation_chrf": 0.0 }, { - "language_name": "South Ndebele", - "speakers": 903418, - "family": "Atlantic-Congo", + "language_name": "Algerian Arabic", + "speakers": 35667507, + "family": "Afro-Asiatic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -699,11 +723,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Naskapi", - "speakers": 1395, - "family": "Algic", + "language_name": "Oromo", + "speakers": 34897121, + "family": "Afro-Asiatic", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -711,9 +735,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Northern Sotho", - "speakers": 5307578, - "family": "Atlantic-Congo", + "language_name": "Bhojpuri", + "speakers": 32934797, + "family": "Indo-European", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -723,9 +747,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Nuer", - "speakers": 591427, - "family": "Nilotic", + "language_name": "Uzbek", + "speakers": 32792780, + "family": "Turkic", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -735,11 +759,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Nigerian Pidgin", - "speakers": 44945880, - "family": "Indo-European", + "language_name": "Azerbaijani", + "speakers": 32446682, + "family": "Turkic", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -747,9 +771,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Pijin", - "speakers": 561780, - "family": "Indo-European", + "language_name": "Hakka Chinese", + "speakers": 32062460, + "family": "Sino-Tibetan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -759,11 +783,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Pennsylvania German", - "speakers": 129729, - "family": "Indo-European", + "language_name": "Sundanese", + "speakers": 32043120, + "family": "Austronesian", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -771,11 +795,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Rohingya", - "speakers": 1824082, + "language_name": "Dutch", + "speakers": 31765645, "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -783,11 +807,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Riffian", - "speakers": 3692411, + "language_name": "Moroccan Arabic", + "speakers": 30938679, "family": "Afro-Asiatic", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -795,11 +819,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Rajbanshi", - "speakers": 133443, + "language_name": "Ukrainian", + "speakers": 29348975, "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -807,11 +831,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Rangpuri", - "speakers": 16274502, - "family": "Indo-European", + "language_name": "Yoruba", + "speakers": 28685568, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -819,8 +843,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Romansh", - "speakers": 42020, + "language_name": "Saraiki", + "speakers": 28020120, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -831,11 +855,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Kalo Finnish Romani", - "speakers": 5015, - "family": "Indo-European", + "language_name": "Igbo", + "speakers": 27823640, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -843,9 +867,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Sinte Romani", - "speakers": 24372, - "family": "Indo-European", + "language_name": "Min Nan Chinese", + "speakers": 26486380, + "family": "Sino-Tibetan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -855,11 +879,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Domari", - "speakers": 1613543, - "family": "Indo-European", + "language_name": "Cebuano", + "speakers": 26203440, + "family": "Austronesian", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -867,11 +891,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Tavringer Romani", - "speakers": 9488, - "family": "Speech Register", + "language_name": "Awadhi", + "speakers": 25862924, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -879,9 +903,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Rundi", - "speakers": 7475454, - "family": "Atlantic-Congo", + "language_name": "Malagasy", + "speakers": 24260130, + "family": "Austronesian", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -891,9 +915,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Ronga", - "speakers": 1023339, - "family": "Atlantic-Congo", + "language_name": "Gan Chinese", + "speakers": 23698340, + "family": "Sino-Tibetan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -915,9 +939,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Tae'", - "speakers": 293729, - "family": "Austronesian", + "language_name": "Bavarian", + "speakers": 22043627, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -927,11 +951,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Rombo", - "speakers": 433291, - "family": "Atlantic-Congo", + "language_name": "Nepali", + "speakers": 20903374, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -939,11 +963,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Rotuman", - "speakers": 2527, - "family": "Austronesian", + "language_name": "Maithili", + "speakers": 19249149, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -951,8 +975,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Russian", - "speakers": 195841151, + "language_name": "Assamese", + "speakers": 17239170, "family": "Indo-European", "average": 0.0, "in_benchmark": true, @@ -963,11 +987,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Rusyn", - "speakers": 527075, - "family": "Indo-European", + "language_name": "Nyanja", + "speakers": 17026781, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -975,11 +999,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Roviana", - "speakers": 9591, - "family": "Austronesian", + "language_name": "Somali", + "speakers": 16911645, + "family": "Afro-Asiatic", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -987,9 +1011,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Riang (India)", - "speakers": 172392, - "family": "Sino-Tibetan", + "language_name": "Madurese", + "speakers": 16822638, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -999,9 +1023,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Romagnol", - "speakers": 0, - "family": "Indo-European", + "language_name": "Northeastern Thai", + "speakers": 16554576, + "family": "Tai-Kadai", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -1011,8 +1035,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Plautdietsch", - "speakers": 90466, + "language_name": "Rangpuri", + "speakers": 16274502, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -1023,11 +1047,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Rejang", - "speakers": 1228320, - "family": "Austronesian", + "language_name": "Magahi", + "speakers": 15913080, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1035,8 +1059,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Palatine German", - "speakers": 0, + "language_name": "Haryanvi", + "speakers": 15913080, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -1047,9 +1071,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Lomwe", - "speakers": 2046678, - "family": "Atlantic-Congo", + "language_name": "Marwari", + "speakers": 15913080, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -1059,11 +1083,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Pökoot", - "speakers": 369343, - "family": "Nilotic", + "language_name": "Serbian", + "speakers": 15602410, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1071,8 +1095,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Polish", - "speakers": 41077399, + "language_name": "Sinhala", + "speakers": 15564656, "family": "Indo-European", "average": 0.0, "in_benchmark": true, @@ -1083,11 +1107,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Piedmontese", - "speakers": 6178, - "family": "Indo-European", + "language_name": "Khmer", + "speakers": 15065030, + "family": "Austroasiatic", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1095,11 +1119,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Pontic", - "speakers": 392463, + "language_name": "Chhattisgarhi", + "speakers": 14586990, "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1107,11 +1131,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Pohnpeian", - "speakers": 23560, - "family": "Austronesian", + "language_name": "Nigerian Fulfulde", + "speakers": 14339876, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1119,11 +1143,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Maliseet-Passamaquoddy", - "speakers": 490, - "family": "Algic", + "language_name": "Zulu", + "speakers": 13973830, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1131,11 +1155,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Parsi-Dari", - "speakers": 864342, - "family": "Bookkeeping", + "language_name": "Kazakh", + "speakers": 13637392, + "family": "Turkic", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1143,8 +1167,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Prussian", - "speakers": 38, + "language_name": "Deccan", + "speakers": 13128291, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -1155,11 +1179,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Pashto", - "speakers": 53542641, + "language_name": "Czech", + "speakers": 13045532, "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1167,11 +1191,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Punu", - "speakers": 200782, - "family": "Atlantic-Congo", + "language_name": "Swedish", + "speakers": 12932871, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1179,11 +1203,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Quechua", - "speakers": 11385851, - "family": "Quechuan", + "language_name": "Hungarian", + "speakers": 12443430, + "family": "Uralic", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1191,11 +1215,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Kʼicheʼ", - "speakers": 1200731, - "family": "Mayan", + "language_name": "Greek", + "speakers": 12292242, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1203,11 +1227,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Chimborazo Highland Quichua", - "speakers": 963579, - "family": "Quechuan", + "language_name": "Shona", + "speakers": 11782503, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1215,8 +1239,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Rajasthani", - "speakers": 1326090, + "language_name": "Low German", + "speakers": 11520008, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -1227,9 +1251,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Réunion Creole French", - "speakers": 559185, - "family": "Indo-European", + "language_name": "Akan", + "speakers": 11442678, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -1239,9 +1263,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Eastern Huasteca Nahuatl", - "speakers": 501735, - "family": "Uto-Aztecan", + "language_name": "Quechua", + "speakers": 11385851, + "family": "Quechuan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -1251,11 +1275,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Newari", - "speakers": 1000821, - "family": "Sino-Tibetan", + "language_name": "Central Kurdish", + "speakers": 11086549, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1263,11 +1287,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Ndonga", - "speakers": 552315, + "language_name": "Kinyarwanda", + "speakers": 11083625, "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1275,11 +1299,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Mende", - "speakers": 1813083, - "family": "Mande", + "language_name": "Wolof", + "speakers": 11025494, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1287,9 +1311,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Maithili", - "speakers": 19249149, - "family": "Indo-European", + "language_name": "Tunisian Arabic", + "speakers": 10549080, + "family": "Afro-Asiatic", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -1299,11 +1323,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Makasar", - "speakers": 1949290, + "language_name": "Iloko", + "speakers": 10481376, "family": "Austronesian", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1311,11 +1335,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Mandingo", - "speakers": 3511762, - "family": "Mande", + "language_name": "Xhosa", + "speakers": 10182944, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1323,11 +1347,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Masai", - "speakers": 1734738, - "family": "Nilotic", + "language_name": "Tigrinya", + "speakers": 10145911, + "family": "Afro-Asiatic", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1335,11 +1359,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Central Mazahua", - "speakers": 437410, - "family": "Otomanguean", + "language_name": "Belarusian", + "speakers": 10064517, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1347,11 +1371,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Moksha", - "speakers": 297616, - "family": "Uralic", + "language_name": "Luba-Lulua", + "speakers": 9770880, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1359,11 +1383,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Maguindanaon", - "speakers": 1310172, - "family": "Austronesian", + "language_name": "Tajik", + "speakers": 9644223, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1371,11 +1395,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Mandar", - "speakers": 245664, - "family": "Austronesian", + "language_name": "Umbundu", + "speakers": 9431467, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1383,11 +1407,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Meru", - "speakers": 2141116, - "family": "Atlantic-Congo", + "language_name": "Bambara", + "speakers": 9385632, + "family": "Mande", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1395,11 +1419,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Rwa", - "speakers": 128816, - "family": "Atlantic-Congo", + "language_name": "Afrikaans", + "speakers": 9318845, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1407,9 +1431,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Hassaniyya", - "speakers": 7239, - "family": "Afro-Asiatic", + "language_name": "Hiligaynon", + "speakers": 9171204, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -1419,11 +1443,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Pattani Malay", - "speakers": 3448870, - "family": "Austronesian", + "language_name": "Kikuyu", + "speakers": 9099743, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1431,11 +1455,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Morisyen", - "speakers": 1241433, + "language_name": "Haitian Creole", + "speakers": 8964918, "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1443,11 +1467,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Mandjak", - "speakers": 121170, - "family": "Atlantic-Congo", + "language_name": "Catalan", + "speakers": 8679139, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1455,9 +1479,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Malagasy", - "speakers": 24260130, - "family": "Austronesian", + "language_name": "Hebrew", + "speakers": 8675480, + "family": "Afro-Asiatic", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -1467,9 +1491,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Makhuwa-Meetto", - "speakers": 1354419, - "family": "Atlantic-Congo", + "language_name": "Sichuan Yi", + "speakers": 8364120, + "family": "Sino-Tibetan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -1479,11 +1503,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Metaʼ", - "speakers": 130401, + "language_name": "Mossi", + "speakers": 8334160, "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1491,9 +1515,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Eastern Magar", - "speakers": 333607, - "family": "Sino-Tibetan", + "language_name": "Baluchi", + "speakers": 8227887, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -1503,11 +1527,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Magahi", - "speakers": 15913080, + "language_name": "Sylheti", + "speakers": 8132550, "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1515,11 +1539,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Mafa", - "speakers": 205313, - "family": "Afro-Asiatic", + "language_name": "Kimbundu", + "speakers": 8130575, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1527,11 +1551,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Madurese", - "speakers": 16822638, - "family": "Austronesian", + "language_name": "Uyghur", + "speakers": 8052967, + "family": "Turkic", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1539,11 +1563,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Laz", - "speakers": 22965, - "family": "Kartvelian", + "language_name": "Minangkabau", + "speakers": 8010780, + "family": "Austronesian", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1551,8 +1575,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Lambadi", - "speakers": 3580443, + "language_name": "Swiss German", + "speakers": 7956952, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -1563,8 +1587,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Lombard", - "speakers": 3901518, + "language_name": "Bulgarian", + "speakers": 7878315, "family": "Indo-European", "average": 0.0, "in_benchmark": true, @@ -1575,9 +1599,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Lingala", - "speakers": 3514491, - "family": "Atlantic-Congo", + "language_name": "Standard Moroccan Tamazight", + "speakers": 7823574, + "family": "Afro-Asiatic", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -1587,11 +1611,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Lao", - "speakers": 5138706, - "family": "Tai-Kadai", + "language_name": "Fula", + "speakers": 7788904, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1599,11 +1623,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Mongo", - "speakers": 620858, - "family": "Atlantic-Congo", + "language_name": "Bosnian", + "speakers": 7594468, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1611,11 +1635,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Lozi", - "speakers": 1045596, + "language_name": "Rundi", + "speakers": 7475454, "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1623,8 +1647,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Northern Luri", - "speakers": 2020512, + "language_name": "Kanauji", + "speakers": 7426104, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -1635,9 +1659,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Lithuanian", - "speakers": 2488617, - "family": "Indo-European", + "language_name": "Santali", + "speakers": 7293495, + "family": "Austroasiatic", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -1647,8 +1671,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Latgalian", - "speakers": 167429, + "language_name": "Danish", + "speakers": 7072056, "family": "Indo-European", "average": 0.0, "in_benchmark": true, @@ -1659,11 +1683,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Luba-Katanga", - "speakers": 2340940, - "family": "Atlantic-Congo", + "language_name": "Turkmen", + "speakers": 6870838, + "family": "Turkic", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1671,9 +1695,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Luba-Lulua", - "speakers": 9770880, - "family": "Atlantic-Congo", + "language_name": "Kurdish", + "speakers": 6866757, + "family": "Indo-European", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -1683,9 +1707,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Luo (Kenya and Tanzania)", - "speakers": 5245734, - "family": "Nilotic", + "language_name": "Croatian", + "speakers": 6813164, + "family": "Indo-European", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -1695,11 +1719,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Luyia", - "speakers": 5888069, - "family": "Atlantic-Congo", + "language_name": "Albanian", + "speakers": 6791906, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1707,11 +1731,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Southern Luri", - "speakers": 1019080, + "language_name": "Slovak", + "speakers": 6680269, "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1719,9 +1743,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Latvian", - "speakers": 1147550, - "family": "Indo-European", + "language_name": "Dyula", + "speakers": 6667328, + "family": "Mande", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -1731,9 +1755,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Eastern Lawa", - "speakers": 6898, - "family": "Austroasiatic", + "language_name": "Northern Thai", + "speakers": 6621830, + "family": "Tai-Kadai", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -1743,11 +1767,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Literary Chinese", - "speakers": 0, - "family": "Sino-Tibetan", + "language_name": "Mongolian", + "speakers": 6572846, + "family": "Mongolic-Khitan", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1755,11 +1779,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Mbunga", - "speakers": 819739, + "language_name": "Southern Sotho", + "speakers": 6390567, "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1767,9 +1791,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Marshallese", - "speakers": 56879, - "family": "Austronesian", + "language_name": "Krio", + "speakers": 6293684, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -1779,11 +1803,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Māori", - "speakers": 137913, - "family": "Austronesian", + "language_name": "Tachelhit", + "speakers": 6187736, + "family": "Afro-Asiatic", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1791,11 +1815,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Indus Kohistani", - "speakers": 326901, - "family": "Indo-European", + "language_name": "Tswana", + "speakers": 6113428, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1803,8 +1827,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Marwari", - "speakers": 15913080, + "language_name": "Mewati", + "speakers": 6100014, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -1815,9 +1839,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Mentawai", - "speakers": 64086, - "family": "Austronesian", + "language_name": "Luyia", + "speakers": 5888069, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -1827,11 +1851,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Manyika", - "speakers": 945510, - "family": "Atlantic-Congo", + "language_name": "Guarani", + "speakers": 5827107, + "family": "Tupian", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1839,9 +1863,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Burmese", - "speakers": 36559231, - "family": "Sino-Tibetan", + "language_name": "Finnish", + "speakers": 5736842, + "family": "Uralic", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -1851,11 +1875,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Erzya", - "speakers": 439338, - "family": "Uralic", + "language_name": "Ganda", + "speakers": 5622890, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1863,9 +1887,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Masaaba", - "speakers": 1254337, - "family": "Atlantic-Congo", + "language_name": "Betawi", + "speakers": 5607546, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -1875,11 +1899,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Mazanderani", - "speakers": 4246165, + "language_name": "Kashmiri", + "speakers": 5598085, "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1887,9 +1911,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Nauru", - "speakers": 6930, - "family": "Austronesian", + "language_name": "Southern Thai", + "speakers": 5518192, + "family": "Tai-Kadai", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -1899,11 +1923,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Min Nan Chinese", - "speakers": 26486380, - "family": "Sino-Tibetan", + "language_name": "Norwegian Bokmål", + "speakers": 5468932, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1911,8 +1935,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Neapolitan", - "speakers": 605306, + "language_name": "Norwegian", + "speakers": 5467440, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -1923,11 +1947,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Nama", - "speakers": 289308, - "family": "Khoe-Kwadi", + "language_name": "Bemba", + "speakers": 5402246, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1935,8 +1959,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Norwegian Bokmål", - "speakers": 5468932, + "language_name": "Armenian", + "speakers": 5317273, "family": "Indo-European", "average": 0.0, "in_benchmark": true, @@ -1947,11 +1971,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Central Huasteca Nahuatl", - "speakers": 244435, - "family": "Uto-Aztecan", + "language_name": "Northern Sotho", + "speakers": 5307578, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1959,11 +1983,35 @@ "translation_chrf": 0.0 }, { - "language_name": "North Ndebele", - "speakers": 1745556, - "family": "Atlantic-Congo", + "language_name": "Luo (Kenya and Tanzania)", + "speakers": 5245734, + "family": "Nilotic", + "average": 0.0, + "in_benchmark": true, + "NaN": 0.0, + "classification_accuracy": 0.0, + "language_modeling_chrf": 0.0, + "translation_bleu": 0.0, + "translation_chrf": 0.0 + }, + { + "language_name": "Tok Pisin", + "speakers": 5154217, + "family": "Indo-European", + "average": 0.0, + "in_benchmark": true, + "NaN": 0.0, + "classification_accuracy": 0.0, + "language_modeling_chrf": 0.0, + "translation_bleu": 0.0, + "translation_chrf": 0.0 + }, + { + "language_name": "Lao", + "speakers": 5138706, + "family": "Tai-Kadai", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -1971,8 +2019,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Ndau", - "speakers": 3867046, + "language_name": "Sukuma", + "speakers": 5094094, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -1983,8 +2031,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Low German", - "speakers": 11520008, + "language_name": "Konkani", + "speakers": 4906533, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -1995,9 +2043,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Nepali", - "speakers": 20903374, - "family": "Indo-European", + "language_name": "Tsonga", + "speakers": 4880932, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -2007,9 +2055,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kita Maninkakan", - "speakers": 977670, - "family": "Mande", + "language_name": "Main-Franconian", + "speakers": 4809582, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2019,11 +2067,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Muscogee", - "speakers": 3992, - "family": "Muskogean", + "language_name": "Balinese", + "speakers": 4806468, + "family": "Austronesian", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2031,11 +2079,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Mi'kmaw", - "speakers": 7916, - "family": "Algic", + "language_name": "Ewe", + "speakers": 4690857, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2043,9 +2091,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Mundang", - "speakers": 277450, - "family": "Atlantic-Congo", + "language_name": "Zhuang", + "speakers": 4321462, + "family": "Tai-Kadai", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2055,8 +2103,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Minangkabau", - "speakers": 8010780, + "language_name": "Buginese", + "speakers": 4298211, "family": "Austronesian", "average": 0.0, "in_benchmark": true, @@ -2067,11 +2115,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Macedonian", - "speakers": 1608565, + "language_name": "Mazanderani", + "speakers": 4246165, "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2079,9 +2127,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Malayalam", - "speakers": 43257484, - "family": "Dravidian", + "language_name": "Goan Konkani", + "speakers": 4243488, + "family": "Indo-European", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -2091,11 +2139,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Masalit", - "speakers": 451060, - "family": "Maban", + "language_name": "Kamba", + "speakers": 4068120, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2103,11 +2151,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Mongolian", - "speakers": 6572846, - "family": "Mongolic-Khitan", + "language_name": "Kalenjin", + "speakers": 4068120, + "family": "Nilotic", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2115,9 +2163,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Manipuri", - "speakers": 1476591, - "family": "Sino-Tibetan", + "language_name": "Banjar", + "speakers": 4010288, + "family": "Austronesian", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -2127,9 +2175,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Mon", - "speakers": 966114, - "family": "Austroasiatic", + "language_name": "Northern Hindko", + "speakers": 3969517, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2139,9 +2187,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Innu-aimun", - "speakers": 12062, - "family": "Algic", + "language_name": "Makhuwa", + "speakers": 3912766, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2151,9 +2199,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Mohawk", - "speakers": 1772, - "family": "Iroquoian", + "language_name": "Gilaki", + "speakers": 3906472, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2163,20 +2211,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Mossi", - "speakers": 8334160, - "family": "Atlantic-Congo", - "average": 0.0, - "in_benchmark": true, - "NaN": 0.0, - "classification_accuracy": 0.0, - "language_modeling_chrf": 0.0, - "translation_bleu": 0.0, - "translation_chrf": 0.0 - }, - { - "language_name": "Marathi", - "speakers": 92826300, + "language_name": "Lombard", + "speakers": 3901518, "family": "Indo-European", "average": 0.0, "in_benchmark": true, @@ -2187,9 +2223,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Western Magar", - "speakers": 251722, - "family": "Sino-Tibetan", + "language_name": "Zarma", + "speakers": 3871308, + "family": "Songhay", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2199,9 +2235,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Western Mari", - "speakers": 29762, - "family": "Uralic", + "language_name": "Ndau", + "speakers": 3867046, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2211,9 +2247,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Mru", - "speakers": 29277, - "family": "Sino-Tibetan", + "language_name": "Sidamo", + "speakers": 3783955, + "family": "Afro-Asiatic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2223,8 +2259,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Malay", - "speakers": 38097307, + "language_name": "Achinese", + "speakers": 3738364, "family": "Austronesian", "average": 0.0, "in_benchmark": true, @@ -2235,11 +2271,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Maltese", - "speakers": 457267, - "family": "Afro-Asiatic", + "language_name": "Shekhawati", + "speakers": 3713052, + "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2247,9 +2283,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Mewari", - "speakers": 1286307, - "family": "Indo-European", + "language_name": "Riffian", + "speakers": 3692411, + "family": "Afro-Asiatic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2259,9 +2295,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kinyarwanda", - "speakers": 11083625, - "family": "Atlantic-Congo", + "language_name": "Shan", + "speakers": 3687984, + "family": "Tai-Kadai", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -2271,9 +2307,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Safaliba", - "speakers": 4108, - "family": "Atlantic-Congo", + "language_name": "Lambadi", + "speakers": 3580443, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2283,9 +2319,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Central Okinawan", - "speakers": 966404, - "family": "Japonic", + "language_name": "Garhwali", + "speakers": 3580443, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2295,11 +2331,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Muslim Tat", - "speakers": 22453, - "family": "Indo-European", + "language_name": "Georgian", + "speakers": 3543646, + "family": "Kartvelian", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2307,11 +2343,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Tuvalu", - "speakers": 9868, - "family": "Austronesian", + "language_name": "Galician", + "speakers": 3515530, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2319,11 +2355,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Tasawaq", - "speakers": 7970, - "family": "Songhay", + "language_name": "Lingala", + "speakers": 3514491, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2331,9 +2367,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Tahitian", - "speakers": 91488, - "family": "Austronesian", + "language_name": "Mandingo", + "speakers": 3511762, + "family": "Mande", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2343,9 +2379,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Tuvinian", - "speakers": 184239, - "family": "Turkic", + "language_name": "Central Atlas Tamazight", + "speakers": 3485047, + "family": "Afro-Asiatic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2355,9 +2391,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Central Atlas Tamazight", - "speakers": 3485047, - "family": "Afro-Asiatic", + "language_name": "Pattani Malay", + "speakers": 3448870, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2367,9 +2403,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Udmurt", - "speakers": 538544, - "family": "Uralic", + "language_name": "Tiv", + "speakers": 3424448, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2379,9 +2415,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Uyghur", - "speakers": 8052967, - "family": "Turkic", + "language_name": "Kabyle", + "speakers": 3351886, + "family": "Afro-Asiatic", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -2391,9 +2427,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Ukrainian", - "speakers": 29348975, - "family": "Indo-European", + "language_name": "Kyrgyz", + "speakers": 3338267, + "family": "Turkic", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -2403,8 +2439,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Ulithian", - "speakers": 2971, + "language_name": "Bikol", + "speakers": 3275430, "family": "Austronesian", "average": 0.0, "in_benchmark": false, @@ -2415,8 +2451,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Umbundu", - "speakers": 9431467, + "language_name": "Fon", + "speakers": 3216150, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": true, @@ -2427,9 +2463,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Mundari", - "speakers": 1252287, - "family": "Austroasiatic", + "language_name": "Gondi", + "speakers": 3182616, + "family": "Dravidian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2439,11 +2475,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Munda", - "speakers": 636523, - "family": "Bookkeeping", + "language_name": "Waray", + "speakers": 3166927, + "family": "Austronesian", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2451,11 +2487,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Uzbek", - "speakers": 32792780, - "family": "Turkic", + "language_name": "Southern Kurdish", + "speakers": 3142162, + "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2463,9 +2499,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Vai", - "speakers": 131906, - "family": "Mande", + "language_name": "Brahui", + "speakers": 3035513, + "family": "Dravidian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2475,8 +2511,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Venda", - "speakers": 1391759, + "language_name": "Baoulé", + "speakers": 3022921, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -2487,9 +2523,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Venetian", - "speakers": 1380829, - "family": "Indo-European", + "language_name": "Tibetan", + "speakers": 3006697, + "family": "Sino-Tibetan", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -2499,9 +2535,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Veps", - "speakers": 3543, - "family": "Uralic", + "language_name": "Ibibio", + "speakers": 2996392, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2511,11 +2547,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Tumbuka", - "speakers": 1780514, + "language_name": "Efik", + "speakers": 2996392, "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2523,11 +2559,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Northeastern Thai", - "speakers": 16554576, - "family": "Tai-Kadai", + "language_name": "Sango", + "speakers": 2935521, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2535,8 +2571,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Virgin Islands Creole English", - "speakers": 3113, + "language_name": "Kumaoni", + "speakers": 2917398, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -2547,11 +2583,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Tooro", - "speakers": 821807, - "family": "Atlantic-Congo", + "language_name": "Aymara", + "speakers": 2838620, + "family": "Aymaran", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2559,9 +2595,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kathoriya Tharu", - "speakers": 72787, - "family": "Indo-European", + "language_name": "Nyankole", + "speakers": 2724939, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2571,8 +2607,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Talysh", - "speakers": 1000168, + "language_name": "Jamaican Creole English", + "speakers": 2668142, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -2583,11 +2619,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Tamashek", - "speakers": 1776965, - "family": "Afro-Asiatic", + "language_name": "Dogri", + "speakers": 2652180, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2595,11 +2631,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Tswana", - "speakers": 6113428, + "language_name": "Gusii", + "speakers": 2622867, "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2607,9 +2643,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Ménik", - "speakers": 3305, - "family": "Atlantic-Congo", + "language_name": "Sasak", + "speakers": 2590152, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2619,9 +2655,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Tongan", - "speakers": 100790, - "family": "Austronesian", + "language_name": "Kurukh", + "speakers": 2519571, + "family": "Dravidian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2631,9 +2667,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Nyasa Tonga", - "speakers": 207727, - "family": "Atlantic-Congo", + "language_name": "Pampanga", + "speakers": 2511163, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2643,11 +2679,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Tok Pisin", - "speakers": 5154217, - "family": "Indo-European", + "language_name": "West Albay Bikol", + "speakers": 2511163, + "family": "Austronesian", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2655,9 +2691,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Turkish", - "speakers": 80360704, - "family": "Turkic", + "language_name": "Lithuanian", + "speakers": 2488617, + "family": "Indo-European", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -2667,8 +2703,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Turoyo", - "speakers": 3035, + "language_name": "Beja", + "speakers": 2460326, "family": "Afro-Asiatic", "average": 0.0, "in_benchmark": false, @@ -2679,8 +2715,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Taroko", - "speakers": 4721, + "language_name": "Batak Toba", + "speakers": 2456639, "family": "Austronesian", "average": 0.0, "in_benchmark": false, @@ -2691,8 +2727,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Torwali", - "speakers": 123756, + "language_name": "Sadri", + "speakers": 2386962, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -2703,21 +2739,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Tsonga", - "speakers": 4880932, - "family": "Atlantic-Congo", - "average": 0.0, - "in_benchmark": true, - "NaN": 0.0, - "classification_accuracy": 0.0, - "language_modeling_chrf": 0.0, - "translation_bleu": 0.0, - "translation_chrf": 0.0 - }, - { - "language_name": "Tsakonian", - "speakers": 202, - "family": "Indo-European", + "language_name": "Waddar", + "speakers": 2386962, + "family": "Dravidian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2727,9 +2751,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Tausug", - "speakers": 1200991, - "family": "Austronesian", + "language_name": "Luba-Katanga", + "speakers": 2340940, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2739,9 +2763,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Tshangla", - "speakers": 117348, - "family": "Sino-Tibetan", + "language_name": "Chiga", + "speakers": 2335662, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2751,11 +2775,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Tatar", - "speakers": 1984108, - "family": "Turkic", + "language_name": "Soga", + "speakers": 2292409, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2763,9 +2787,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Vietnamese", - "speakers": 86222962, - "family": "Austroasiatic", + "language_name": "Swati", + "speakers": 2212379, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -2775,8 +2799,8 @@ "translation_chrf": 0.0 }, { - "language_name": "West Flemish", - "speakers": 1172070, + "language_name": "Hazaragi", + "speakers": 2161984, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -2787,9 +2811,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Tokelau", - "speakers": 1285, - "family": "Austronesian", + "language_name": "Meru", + "speakers": 2141116, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2811,9 +2835,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Sherpa", - "speakers": 157705, - "family": "Sino-Tibetan", + "language_name": "Afar", + "speakers": 2119663, + "family": "Afro-Asiatic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2823,9 +2847,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Yao", - "speakers": 722357, - "family": "Atlantic-Congo", + "language_name": "Teso", + "speakers": 2082973, + "family": "Nilotic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2835,9 +2859,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Yapese", - "speakers": 6556, - "family": "Austronesian", + "language_name": "Lomwe", + "speakers": 2046678, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2847,11 +2871,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Yangben", - "speakers": 2303, - "family": "Atlantic-Congo", + "language_name": "Occitan", + "speakers": 2040398, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2859,9 +2883,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Yemba", - "speakers": 443920, - "family": "Atlantic-Congo", + "language_name": "Western Balochi", + "speakers": 2037382, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2871,11 +2895,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Yiddish", - "speakers": 997214, + "language_name": "Northern Luri", + "speakers": 2020512, "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2883,11 +2907,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Yoruba", - "speakers": 28685568, - "family": "Atlantic-Congo", + "language_name": "Wagdi", + "speakers": 1989135, + "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2895,9 +2919,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Nheengatu", - "speakers": 26171, - "family": "Tupian", + "language_name": "Tulu", + "speakers": 1989135, + "family": "Dravidian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2907,9 +2931,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Yucateco", - "speakers": 861955, - "family": "Mayan", + "language_name": "Khandesi", + "speakers": 1989135, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2919,9 +2943,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Cantonese", - "speakers": 79654759, - "family": "Sino-Tibetan", + "language_name": "Tatar", + "speakers": 1984108, + "family": "Turkic", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -2931,11 +2955,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Zhuang", - "speakers": 4321462, - "family": "Tai-Kadai", + "language_name": "Slovenian", + "speakers": 1973181, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -2943,9 +2967,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Zaghawa", - "speakers": 232364, - "family": "Saharan", + "language_name": "Makasar", + "speakers": 1949290, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2955,9 +2979,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Ngazidja Comorian", - "speakers": 313124, - "family": "Atlantic-Congo", + "language_name": "Wolaytta", + "speakers": 1946034, + "family": "Ta-Ne-Omotic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2967,9 +2991,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Zeelandic", - "speakers": 241926, - "family": "Indo-European", + "language_name": "Nyamwezi", + "speakers": 1932242, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -2979,9 +3003,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Standard Moroccan Tamazight", - "speakers": 7823574, - "family": "Afro-Asiatic", + "language_name": "Bodo", + "speakers": 1856526, + "family": "Sino-Tibetan", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -2991,8 +3015,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Negeri Sembilan Malay", - "speakers": 391825, + "language_name": "Lampung Api", + "speakers": 1842479, "family": "Austronesian", "average": 0.0, "in_benchmark": false, @@ -3003,9 +3027,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Zulu", - "speakers": 13973830, - "family": "Atlantic-Congo", + "language_name": "Chuvash", + "speakers": 1842386, + "family": "Turkic", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -3015,11 +3039,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Soga", - "speakers": 2292409, - "family": "Atlantic-Congo", + "language_name": "Bashkir", + "speakers": 1842386, + "family": "Turkic", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -3027,9 +3051,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Mingrelian", - "speakers": 439670, - "family": "Kartvelian", + "language_name": "Rohingya", + "speakers": 1824082, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3039,9 +3063,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Main-Franconian", - "speakers": 4809582, - "family": "Indo-European", + "language_name": "Mende", + "speakers": 1813083, + "family": "Mande", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3051,8 +3075,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Xhosa", - "speakers": 10182944, + "language_name": "Tumbuka", + "speakers": 1780514, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": true, @@ -3063,9 +3087,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Makhuwa", - "speakers": 3912766, - "family": "Atlantic-Congo", + "language_name": "Tamashek", + "speakers": 1776965, + "family": "Afro-Asiatic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3075,9 +3099,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Votic", - "speakers": 0, - "family": "Uralic", + "language_name": "North Ndebele", + "speakers": 1745556, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3087,9 +3111,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Võro", - "speakers": 70031, - "family": "Uralic", + "language_name": "Masai", + "speakers": 1734738, + "family": "Nilotic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3099,8 +3123,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Vunjo", - "speakers": 433291, + "language_name": "Serer", + "speakers": 1731004, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -3111,8 +3135,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Walloon", - "speakers": 679801, + "language_name": "Nimadi", + "speakers": 1723917, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -3123,9 +3147,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Walser", - "speakers": 11377, - "family": "Indo-European", + "language_name": "Timne", + "speakers": 1722482, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3135,9 +3159,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Wolaytta", - "speakers": 1946034, - "family": "Ta-Ne-Omotic", + "language_name": "Scots", + "speakers": 1644028, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3147,21 +3171,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Waray", - "speakers": 3166927, - "family": "Austronesian", - "average": 0.0, - "in_benchmark": true, - "NaN": 0.0, - "classification_accuracy": 0.0, - "language_modeling_chrf": 0.0, - "translation_bleu": 0.0, - "translation_chrf": 0.0 - }, - { - "language_name": "Warlpiri", - "speakers": 2496, - "family": "Pama-Nyungan", + "language_name": "Lango (Uganda)", + "speakers": 1643614, + "family": "Nilotic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3171,9 +3183,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Waddar", - "speakers": 2386962, - "family": "Dravidian", + "language_name": "Domari", + "speakers": 1613543, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3183,11 +3195,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Wagdi", - "speakers": 1989135, + "language_name": "Macedonian", + "speakers": 1608565, "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -3195,9 +3207,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Wallisian", - "speakers": 9512, - "family": "Austronesian", + "language_name": "Acoli", + "speakers": 1600361, + "family": "Nilotic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3207,8 +3219,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Ndzwani Comorian", - "speakers": 287736, + "language_name": "Central-Eastern Niger Fulfulde", + "speakers": 1594068, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -3219,11 +3231,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Wolof", - "speakers": 11025494, - "family": "Atlantic-Congo", + "language_name": "Bhili", + "speakers": 1591308, + "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -3231,11 +3243,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Mewati", - "speakers": 6100014, - "family": "Indo-European", + "language_name": "Pangasinan", + "speakers": 1528534, + "family": "Austronesian", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -3243,9 +3255,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Wu Chinese", - "speakers": 83641200, - "family": "Sino-Tibetan", + "language_name": "Kongo", + "speakers": 1526700, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3255,9 +3267,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Xavánte", - "speakers": 9951, - "family": "Nuclear-Macro-Je", + "language_name": "Bini", + "speakers": 1519599, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3267,9 +3279,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Tsakhur", - "speakers": 16329, - "family": "Nakh-Daghestanian", + "language_name": "Maasina Fulfulde", + "speakers": 1505612, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3279,9 +3291,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Turkmen", - "speakers": 6870838, - "family": "Turkic", + "language_name": "Manipuri", + "speakers": 1476591, + "family": "Sino-Tibetan", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -3291,11 +3303,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Sanskrit", - "speakers": 15913, - "family": "Indo-European", + "language_name": "Abron", + "speakers": 1467010, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -3303,9 +3315,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Koyraboro Senni", - "speakers": 664816, - "family": "Songhay", + "language_name": "Makonde", + "speakers": 1463820, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3315,8 +3327,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Samogitian", - "speakers": 0, + "language_name": "Gheg Albanian", + "speakers": 1430250, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -3327,9 +3339,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Tachelhit", - "speakers": 6187736, - "family": "Afro-Asiatic", + "language_name": "Venda", + "speakers": 1391759, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3339,11 +3351,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Shan", - "speakers": 3687984, - "family": "Tai-Kadai", + "language_name": "Sena", + "speakers": 1384517, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -3351,8 +3363,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Sinhala", - "speakers": 15564656, + "language_name": "Venetian", + "speakers": 1380829, "family": "Indo-European", "average": 0.0, "in_benchmark": true, @@ -3363,9 +3375,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Sidamo", - "speakers": 3783955, - "family": "Afro-Asiatic", + "language_name": "Susu", + "speakers": 1378014, + "family": "Mande", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3375,8 +3387,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Slovak", - "speakers": 6680269, + "language_name": "Norwegian Nynorsk", + "speakers": 1366860, "family": "Indo-European", "average": 0.0, "in_benchmark": true, @@ -3387,9 +3399,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Saraiki", - "speakers": 28020120, - "family": "Indo-European", + "language_name": "Makhuwa-Meetto", + "speakers": 1354419, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3399,20 +3411,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Slovenian", - "speakers": 1973181, - "family": "Indo-European", - "average": 0.0, - "in_benchmark": true, - "NaN": 0.0, - "classification_accuracy": 0.0, - "language_modeling_chrf": 0.0, - "translation_bleu": 0.0, - "translation_chrf": 0.0 - }, - { - "language_name": "Lower Silesian", - "speakers": 11868, + "language_name": "Rajasthani", + "speakers": 1326090, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -3423,9 +3423,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Selayar", - "speakers": 144194, - "family": "Austronesian", + "language_name": "Ho", + "speakers": 1312829, + "family": "Austroasiatic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3435,11 +3435,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Samoan", - "speakers": 252717, + "language_name": "Maguindanaon", + "speakers": 1310172, "family": "Austronesian", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -3447,9 +3447,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Southern Sami", - "speakers": 296, - "family": "Uralic", + "language_name": "Mewari", + "speakers": 1286307, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3459,9 +3459,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Lule Sami", - "speakers": 1530, - "family": "Uralic", + "language_name": "Bulu", + "speakers": 1276270, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3471,9 +3471,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Inari Sami", - "speakers": 613, - "family": "Uralic", + "language_name": "Masaaba", + "speakers": 1254337, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3483,9 +3483,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Skolt Sami", - "speakers": 613, - "family": "Uralic", + "language_name": "Mundari", + "speakers": 1252287, + "family": "Austroasiatic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3495,11 +3495,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Shona", - "speakers": 11782503, - "family": "Atlantic-Congo", + "language_name": "Morisyen", + "speakers": 1241433, + "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -3507,11 +3507,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Noon", - "speakers": 37767, - "family": "Atlantic-Congo", + "language_name": "Irish", + "speakers": 1237487, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -3519,11 +3519,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Sango", - "speakers": 2935521, - "family": "Atlantic-Congo", + "language_name": "Fur", + "speakers": 1230163, + "family": "Furan", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -3531,9 +3531,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Seri", - "speakers": 901, - "family": null, + "language_name": "Rejang", + "speakers": 1228320, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3543,11 +3543,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Somali", - "speakers": 16911645, - "family": "Afro-Asiatic", + "language_name": "Bhilali", + "speakers": 1220003, + "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -3555,9 +3555,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Sena", - "speakers": 1384517, - "family": "Atlantic-Congo", + "language_name": "Tausug", + "speakers": 1200991, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3567,9 +3567,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Laki", - "speakers": 645417, - "family": "Indo-European", + "language_name": "Kʼicheʼ", + "speakers": 1200731, + "family": "Mayan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3579,9 +3579,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Yakut", - "speakers": 453510, - "family": "Turkic", + "language_name": "Bakhtiari", + "speakers": 1188926, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3591,9 +3591,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Samburu", - "speakers": 246228, - "family": "Nilotic", + "language_name": "Kpelle", + "speakers": 1186303, + "family": "Mande", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3603,9 +3603,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Sasak", - "speakers": 2590152, - "family": "Austronesian", + "language_name": "Cebaara Senoufo", + "speakers": 1181687, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3615,11 +3615,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Santali", - "speakers": 7293495, + "language_name": "Northern Khmer", + "speakers": 1172616, "family": "Austroasiatic", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -3627,9 +3627,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Saafi-Saafi", - "speakers": 236046, - "family": "Atlantic-Congo", + "language_name": "West Flemish", + "speakers": 1172070, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3639,9 +3639,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Saurashtra", - "speakers": 384566, - "family": "Indo-European", + "language_name": "Soninke", + "speakers": 1153651, + "family": "Mande", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3651,9 +3651,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Sangu", - "speakers": 117106, - "family": "Atlantic-Congo", + "language_name": "Zaza", + "speakers": 1148245, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3663,8 +3663,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Sardinian", - "speakers": 1060846, + "language_name": "Latvian", + "speakers": 1147550, "family": "Indo-European", "average": 0.0, "in_benchmark": true, @@ -3675,9 +3675,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Sadri", - "speakers": 2386962, - "family": "Indo-European", + "language_name": "Dan", + "speakers": 1099244, + "family": "Mande", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3687,11 +3687,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Sicilian", - "speakers": 511702, - "family": "Indo-European", + "language_name": "Gorontalo", + "speakers": 1094807, + "family": "Austronesian", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -3699,9 +3699,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Scots", - "speakers": 1644028, - "family": "Indo-European", + "language_name": "Tigre", + "speakers": 1094616, + "family": "Afro-Asiatic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3711,9 +3711,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Sindhi", - "speakers": 40329510, - "family": "Indo-European", + "language_name": "Basque", + "speakers": 1088519, + "family": null, "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -3723,8 +3723,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Sassarese Sardinian", - "speakers": 106085, + "language_name": "Hadothi", + "speakers": 1087394, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -3735,9 +3735,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Southern Kurdish", - "speakers": 3142162, - "family": "Indo-European", + "language_name": "Kabardian", + "speakers": 1070873, + "family": "Abkhaz-Adyge", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3747,9 +3747,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Northern Sami", - "speakers": 51530, - "family": "Uralic", + "language_name": "Khasi", + "speakers": 1060872, + "family": "Austroasiatic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3759,11 +3759,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Cebaara Senoufo", - "speakers": 1181687, - "family": "Atlantic-Congo", + "language_name": "Sardinian", + "speakers": 1060846, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -3771,9 +3771,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Soninke", - "speakers": 1153651, - "family": "Mande", + "language_name": "Lozi", + "speakers": 1045596, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3783,9 +3783,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Southern Thai", - "speakers": 5518192, - "family": "Tai-Kadai", + "language_name": "Frafra", + "speakers": 1026907, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3795,11 +3795,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Tiv", - "speakers": 3424448, - "family": "Atlantic-Congo", + "language_name": "Najdi Arabic", + "speakers": 1025205, + "family": "Afro-Asiatic", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -3807,11 +3807,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Silesian", - "speakers": 497670, - "family": "Indo-European", + "language_name": "Ronga", + "speakers": 1023339, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -3819,9 +3819,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Eastern Tamang", - "speakers": 130410, - "family": "Sino-Tibetan", + "language_name": "Southern Luri", + "speakers": 1019080, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3831,9 +3831,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Tagbanwa", - "speakers": 10045, - "family": "Austronesian", + "language_name": "Newari", + "speakers": 1000821, + "family": "Sino-Tibetan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3843,9 +3843,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Tulu", - "speakers": 1989135, - "family": "Dravidian", + "language_name": "Talysh", + "speakers": 1000168, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3855,11 +3855,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Tai Nüa", - "speakers": 264864, - "family": "Tai-Kadai", + "language_name": "Yiddish", + "speakers": 997214, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -3867,9 +3867,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Western Tamang", - "speakers": 394263, - "family": "Sino-Tibetan", + "language_name": "Bena", + "speakers": 995398, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3879,9 +3879,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Thulung", - "speakers": 36393, - "family": "Sino-Tibetan", + "language_name": "Shambala", + "speakers": 995398, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3891,11 +3891,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Telugu", - "speakers": 95478480, - "family": "Dravidian", + "language_name": "Kachhi", + "speakers": 994568, + "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -3903,9 +3903,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Timne", - "speakers": 1722482, - "family": "Atlantic-Congo", + "language_name": "Ngaju", + "speakers": 987996, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3915,9 +3915,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Teso", - "speakers": 2082973, - "family": "Nilotic", + "language_name": "Kita Maninkakan", + "speakers": 977670, + "family": "Mande", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3925,11 +3925,11 @@ "language_modeling_chrf": 0.0, "translation_bleu": 0.0, "translation_chrf": 0.0 - }, - { - "language_name": "Tetum", - "speakers": 816395, - "family": "Austronesian", + }, + { + "language_name": "Jumli", + "speakers": 970493, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3939,11 +3939,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Tajik", - "speakers": 9644223, - "family": "Indo-European", + "language_name": "Central Okinawan", + "speakers": 966404, + "family": "Japonic", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -3951,11 +3951,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Thai", - "speakers": 55181920, - "family": "Tai-Kadai", + "language_name": "Mon", + "speakers": 966114, + "family": "Austroasiatic", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -3963,9 +3963,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Dangaura Tharu", - "speakers": 606558, - "family": "Indo-European", + "language_name": "Chimborazo Highland Quichua", + "speakers": 963579, + "family": "Quechuan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -3975,11 +3975,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Kochila Tharu", - "speakers": 303279, - "family": "Indo-European", + "language_name": "Kachin", + "speakers": 962032, + "family": "Sino-Tibetan", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -3987,8 +3987,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Rana Tharu", - "speakers": 363935, + "language_name": "Southern Hindko", + "speakers": 957354, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -3999,9 +3999,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Tigrinya", - "speakers": 10145911, - "family": "Afro-Asiatic", + "language_name": "Limburgish", + "speakers": 950422, + "family": "Indo-European", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -4011,9 +4011,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Tigre", - "speakers": 1094616, - "family": "Afro-Asiatic", + "language_name": "Manyika", + "speakers": 945510, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4023,11 +4023,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Tamil", - "speakers": 85616159, - "family": "Dravidian", + "language_name": "Chechen", + "speakers": 935365, + "family": "Nakh-Daghestanian", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4035,9 +4035,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Syriac", - "speakers": 210659, - "family": "Afro-Asiatic", + "language_name": "Kuanyama", + "speakers": 920524, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4047,11 +4047,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Albanian", - "speakers": 6791906, - "family": "Indo-European", + "language_name": "South Ndebele", + "speakers": 903418, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4059,11 +4059,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Sylheti", - "speakers": 8132550, + "language_name": "Welsh", + "speakers": 884910, "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4071,11 +4071,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Serbian", - "speakers": 15602410, - "family": "Indo-European", + "language_name": "Adangme", + "speakers": 880206, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4083,11 +4083,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Sranan Tongo", - "speakers": 414507, - "family": "Indo-European", + "language_name": "Estonian", + "speakers": 878449, + "family": "Uralic", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4095,9 +4095,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Serer", - "speakers": 1731004, - "family": "Atlantic-Congo", + "language_name": "Parsi-Dari", + "speakers": 864342, + "family": "Bookkeeping", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4107,9 +4107,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Sirmauri", - "speakers": 464132, - "family": "Indo-European", + "language_name": "Yucateco", + "speakers": 861955, + "family": "Mayan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4119,11 +4119,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Swati", - "speakers": 2212379, + "language_name": "Ewondo", + "speakers": 860095, "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4131,9 +4131,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Saho", - "speakers": 218923, - "family": "Afro-Asiatic", + "language_name": "Komering", + "speakers": 854483, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4143,11 +4143,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Southern Sotho", - "speakers": 6390567, + "language_name": "Tooro", + "speakers": 821807, "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4155,9 +4155,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Saterland Frisian", - "speakers": 962, - "family": "Indo-European", + "language_name": "Garo", + "speakers": 821563, + "family": "Sino-Tibetan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4167,11 +4167,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Sundanese", - "speakers": 32043120, - "family": "Austronesian", + "language_name": "Ga", + "speakers": 821526, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4179,8 +4179,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Sukuma", - "speakers": 5094094, + "language_name": "Mbunga", + "speakers": 819739, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -4191,9 +4191,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Susu", - "speakers": 1378014, - "family": "Mande", + "language_name": "Tetum", + "speakers": 816395, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4203,11 +4203,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Swedish", - "speakers": 12932871, - "family": "Indo-European", + "language_name": "Iban", + "speakers": 816302, + "family": "Bookkeeping", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4215,11 +4215,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Swahili", - "speakers": 171610296, + "language_name": "Embu", + "speakers": 802918, "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4227,9 +4227,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Comorian", - "speakers": 170720, - "family": "Atlantic-Congo", + "language_name": "Swabian", + "speakers": 801597, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4239,9 +4239,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Swabian", - "speakers": 801597, - "family": "Indo-European", + "language_name": "Hmong Njua", + "speakers": 781687, + "family": "Hmong-Mien", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4251,9 +4251,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Shekhawati", - "speakers": 3713052, - "family": "Indo-European", + "language_name": "Kalanga", + "speakers": 770954, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4263,9 +4263,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Sangir", - "speakers": 245664, - "family": "Austronesian", + "language_name": "Dotyali", + "speakers": 758198, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4275,9 +4275,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Lakota", - "speakers": 8316, - "family": "Siouan", + "language_name": "Picard", + "speakers": 746330, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4287,9 +4287,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Lillooet", - "speakers": 528, - "family": "Salishan", + "language_name": "Western Frisian", + "speakers": 743057, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4299,9 +4299,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Lampung Api", - "speakers": 1842479, - "family": "Austronesian", + "language_name": "Chakma", + "speakers": 729137, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4311,9 +4311,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Cherokee", - "speakers": 25613, - "family": "Iroquoian", + "language_name": "Yao", + "speakers": 722357, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4323,8 +4323,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Western Cham", - "speakers": 270832, + "language_name": "Uab Meto", + "speakers": 720970, "family": "Austronesian", "average": 0.0, "in_benchmark": false, @@ -4335,8 +4335,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Eastern Cham", - "speakers": 87862, + "language_name": "Capiznon", + "speakers": 720595, "family": "Austronesian", "average": 0.0, "in_benchmark": false, @@ -4347,11 +4347,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Central Kurdish", - "speakers": 11086549, - "family": "Indo-European", + "language_name": "Asu", + "speakers": 702634, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4359,9 +4359,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Chilcotin", - "speakers": 867, - "family": "Athabaskan-Eyak-Tlingit", + "language_name": "Tai Dam", + "speakers": 681177, + "family": "Tai-Kadai", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4371,8 +4371,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Corsican", - "speakers": 162836, + "language_name": "Walloon", + "speakers": 679801, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -4383,9 +4383,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Capiznon", - "speakers": 720595, - "family": "Austronesian", + "language_name": "Koyraboro Senni", + "speakers": 664816, + "family": "Songhay", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4395,9 +4395,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Cree", - "speakers": 9047, - "family": "Algic", + "language_name": "Bagheli", + "speakers": 654424, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4407,11 +4407,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Michif", - "speakers": 678, - "family": "Algic", + "language_name": "Asturian", + "speakers": 650205, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4419,11 +4419,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Crimean Tatar", - "speakers": 245968, - "family": "Turkic", + "language_name": "Laki", + "speakers": 645417, + "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4431,9 +4431,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Plains Cree", - "speakers": 4146, - "family": "Algic", + "language_name": "Munda", + "speakers": 636523, + "family": "Bookkeeping", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4443,9 +4443,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Northern East Cree", - "speakers": 377, - "family": "Algic", + "language_name": "Lisu", + "speakers": 627309, + "family": "Sino-Tibetan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4455,11 +4455,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Seselwa Creole French", - "speakers": 94061, - "family": "Indo-European", + "language_name": "N’Ko", + "speakers": 626370, + "family": "Artificial Language", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4467,11 +4467,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Czech", - "speakers": 13045532, + "language_name": "Gronings", + "speakers": 622094, "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4479,9 +4479,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kashubian", - "speakers": 49767, - "family": "Indo-European", + "language_name": "Mongo", + "speakers": 620858, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4491,9 +4491,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Swampy Cree", - "speakers": 1809, - "family": "Algic", + "language_name": "Dangaura Tharu", + "speakers": 606558, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4503,8 +4503,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Church Slavic", - "speakers": 0, + "language_name": "Neapolitan", + "speakers": 605306, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -4515,9 +4515,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Chuvash", - "speakers": 1842386, - "family": "Turkic", + "language_name": "Nuer", + "speakers": 591427, + "family": "Nilotic", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -4527,9 +4527,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Chickasaw", - "speakers": 0, - "family": "Muskogean", + "language_name": "Breton", + "speakers": 563140, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4539,9 +4539,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Chipewyan", - "speakers": 12816, - "family": "Athabaskan-Eyak-Tlingit", + "language_name": "Pijin", + "speakers": 561780, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4551,11 +4551,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Danish", - "speakers": 7072056, + "language_name": "Réunion Creole French", + "speakers": 559185, "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4563,9 +4563,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Choctaw", - "speakers": 10977, - "family": "Muskogean", + "language_name": "Avaric", + "speakers": 552716, + "family": "Nakh-Daghestanian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4575,9 +4575,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Bushi", - "speakers": 44620, - "family": "Austronesian", + "language_name": "Ndonga", + "speakers": 552315, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4587,11 +4587,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Buginese", - "speakers": 4298211, - "family": "Austronesian", + "language_name": "Ossetic", + "speakers": 541444, + "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4599,9 +4599,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Bulu", - "speakers": 1276270, - "family": "Atlantic-Congo", + "language_name": "Udmurt", + "speakers": 538544, + "family": "Uralic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4611,11 +4611,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Bube", - "speakers": 66058, - "family": "Atlantic-Congo", + "language_name": "Ligurian", + "speakers": 536663, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4623,11 +4623,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Blin", - "speakers": 79056, - "family": "Afro-Asiatic", + "language_name": "Kabuverdianu", + "speakers": 530762, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4635,9 +4635,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Medumba", - "speakers": 305195, - "family": "Atlantic-Congo", + "language_name": "Rusyn", + "speakers": 527075, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4647,11 +4647,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Jenaama Bozo", - "speakers": 166204, - "family": "Mande", + "language_name": "Mari", + "speakers": 524371, + "family": "Uralic", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4659,8 +4659,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Catalan", - "speakers": 8679139, + "language_name": "Sicilian", + "speakers": 511702, "family": "Indo-European", "average": 0.0, "in_benchmark": true, @@ -4671,9 +4671,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Caddo", - "speakers": 0, - "family": "Caddoan", + "language_name": "Langi", + "speakers": 509409, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4683,9 +4683,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Atsam", - "speakers": 44946, - "family": "Atlantic-Congo", + "language_name": "Balti", + "speakers": 502520, + "family": "Sino-Tibetan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4695,9 +4695,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Chakma", - "speakers": 729137, - "family": "Indo-European", + "language_name": "Eastern Huasteca Nahuatl", + "speakers": 501735, + "family": "Uto-Aztecan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4707,9 +4707,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Chechen", - "speakers": 935365, - "family": "Nakh-Daghestanian", + "language_name": "Western Huasteca Nahuatl", + "speakers": 501735, + "family": "Uto-Aztecan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4719,9 +4719,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Cebuano", - "speakers": 26203440, - "family": "Austronesian", + "language_name": "Silesian", + "speakers": 497670, + "family": "Indo-European", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -4731,9 +4731,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Chiga", - "speakers": 2335662, - "family": "Atlantic-Congo", + "language_name": "Kara-Kalpak", + "speakers": 489046, + "family": "Turkic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4743,9 +4743,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Chamorro", - "speakers": 46325, - "family": "Austronesian", + "language_name": "Gujari", + "speakers": 467002, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4755,9 +4755,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Chuukese", - "speakers": 30731, - "family": "Austronesian", + "language_name": "Sirmauri", + "speakers": 464132, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4767,9 +4767,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Mari", - "speakers": 524371, - "family": "Uralic", + "language_name": "Maltese", + "speakers": 457267, + "family": "Afro-Asiatic", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -4779,11 +4779,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Welsh", - "speakers": 884910, - "family": "Indo-European", + "language_name": "Bantawa", + "speakers": 454918, + "family": "Sino-Tibetan", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4791,9 +4791,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Dakota", - "speakers": 20832, - "family": "Siouan", + "language_name": "Yakut", + "speakers": 453510, + "family": "Turkic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4803,9 +4803,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Bateri", - "speakers": 78843, - "family": "Indo-European", + "language_name": "Masalit", + "speakers": 451060, + "family": "Maban", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4815,8 +4815,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Efik", - "speakers": 2996392, + "language_name": "Jju", + "speakers": 449459, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -4827,11 +4827,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Greek", - "speakers": 12292242, - "family": "Indo-European", + "language_name": "Adyghe", + "speakers": 444583, + "family": "Abkhaz-Adyge", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4839,11 +4839,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Esperanto", - "speakers": 301, - "family": "Artificial Language", + "language_name": "Yemba", + "speakers": 443920, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4851,9 +4851,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Central Yupik", - "speakers": 20956, - "family": "Eskimo-Aleut", + "language_name": "Mingrelian", + "speakers": 439670, + "family": "Kartvelian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4863,32 +4863,20 @@ "translation_chrf": 0.0 }, { - "language_name": "Estonian", - "speakers": 878449, + "language_name": "Erzya", + "speakers": 439338, "family": "Uralic", "average": 0.0, - "in_benchmark": true, - "NaN": 0.0, - "classification_accuracy": 0.0, - "language_modeling_chrf": 0.0, - "translation_bleu": 0.0, - "translation_chrf": 0.0 - }, - { - "language_name": "Basque", - "speakers": 1088519, - "family": null, - "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, "translation_bleu": 0.0, "translation_chrf": 0.0 }, - { - "language_name": "Ewondo", - "speakers": 860095, + { + "language_name": "Taita", + "speakers": 438929, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -4899,9 +4887,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Extremaduran", - "speakers": 245077, - "family": "Indo-European", + "language_name": "Central Mazahua", + "speakers": 437410, + "family": "Otomanguean", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4911,11 +4899,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Persian", - "speakers": 84710459, - "family": "Indo-European", + "language_name": "Vunjo", + "speakers": 433291, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4923,8 +4911,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Fang", - "speakers": 426451, + "language_name": "Rombo", + "speakers": 433291, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -4935,9 +4923,9 @@ "translation_chrf": 0.0 }, { - "language_name": "West Albay Bikol", - "speakers": 2511163, - "family": "Austronesian", + "language_name": "Machame", + "speakers": 433291, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4947,9 +4935,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Fula", - "speakers": 7788904, - "family": "Atlantic-Congo", + "language_name": "Khmu", + "speakers": 431949, + "family": "Austroasiatic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4959,8 +4947,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Maasina Fulfulde", - "speakers": 1505612, + "language_name": "Fang", + "speakers": 426451, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -4971,11 +4959,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Finnish", - "speakers": 5736842, - "family": "Uralic", + "language_name": "Kinaray-a", + "speakers": 425806, + "family": "Austronesian", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -4983,9 +4971,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Nobiin", - "speakers": 378161, - "family": "Nubian", + "language_name": "Kuy", + "speakers": 421207, + "family": "Austroasiatic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -4995,9 +4983,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Filipino", - "speakers": 67471096, - "family": "Austronesian", + "language_name": "Luxembourgish", + "speakers": 421015, + "family": "Indo-European", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -5007,9 +4995,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Tornedalen Finnish", - "speakers": 56114, - "family": "Uralic", + "language_name": "Sranan Tongo", + "speakers": 414507, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5019,11 +5007,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Fijian", - "speakers": 365030, - "family": "Austronesian", + "language_name": "Jola-Fonyi", + "speakers": 409146, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5031,9 +5019,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Emilian", - "speakers": 31201, - "family": "Indo-European", + "language_name": "Western Tamang", + "speakers": 394263, + "family": "Sino-Tibetan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5043,11 +5031,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Ewe", - "speakers": 4690857, - "family": "Atlantic-Congo", + "language_name": "Pontic", + "speakers": 392463, + "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5055,9 +5043,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Dargwa", - "speakers": 368477, - "family": "Nakh-Daghestanian", + "language_name": "Negeri Sembilan Malay", + "speakers": 391825, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5067,8 +5055,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Embu", - "speakers": 802918, + "language_name": "Ngiemboon", + "speakers": 388430, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -5079,8 +5067,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Taita", - "speakers": 438929, + "language_name": "Ghomala", + "speakers": 388430, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -5091,8 +5079,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Deccan", - "speakers": 13128291, + "language_name": "Divehi", + "speakers": 388044, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -5103,11 +5091,11 @@ "translation_chrf": 0.0 }, { - "language_name": "German", - "speakers": 136350226, + "language_name": "Saurashtra", + "speakers": 384566, "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5115,9 +5103,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Slave", - "speakers": 2299, - "family": "Athabaskan-Eyak-Tlingit", + "language_name": "Fiji Hindi", + "speakers": 383749, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5127,9 +5115,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Dogrib", - "speakers": 2111, - "family": "Athabaskan-Eyak-Tlingit", + "language_name": "Nobiin", + "speakers": 378161, + "family": "Nubian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5139,9 +5127,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Zarma", - "speakers": 3871308, - "family": "Songhay", + "language_name": "Balkan Gagauz Turkish", + "speakers": 377280, + "family": "Turkic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5151,9 +5139,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Dan", - "speakers": 1099244, - "family": "Mande", + "language_name": "Kerinci", + "speakers": 373836, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5163,11 +5151,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Dogri", - "speakers": 2652180, + "language_name": "Parkari Koli", + "speakers": 373602, "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5175,11 +5163,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Lower Sorbian", - "speakers": 6974, - "family": "Indo-European", + "language_name": "Dzongkha", + "speakers": 370341, + "family": "Bookkeeping", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5187,9 +5175,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Tomo Kan Dogon", - "speakers": 215087, - "family": "Dogon", + "language_name": "Pökoot", + "speakers": 369343, + "family": "Nilotic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5199,9 +5187,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Central Dusun", - "speakers": 182852, - "family": "Austronesian", + "language_name": "Dargwa", + "speakers": 368477, + "family": "Nakh-Daghestanian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5211,9 +5199,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Dotyali", - "speakers": 758198, - "family": "Indo-European", + "language_name": "Limbu", + "speakers": 368085, + "family": "Sino-Tibetan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5223,11 +5211,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Duala", - "speakers": 133176, - "family": "Atlantic-Congo", + "language_name": "Fijian", + "speakers": 365030, + "family": "Austronesian", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5235,8 +5223,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Divehi", - "speakers": 388044, + "language_name": "Rana Tharu", + "speakers": 363935, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -5247,8 +5235,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Jola-Fonyi", - "speakers": 409146, + "language_name": "Kom", + "speakers": 360685, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -5259,9 +5247,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Dyula", - "speakers": 6667328, - "family": "Mande", + "language_name": "Icelandic", + "speakers": 350734, + "family": "Indo-European", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -5271,11 +5259,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Dzongkha", - "speakers": 370341, - "family": "Bookkeeping", + "language_name": "Khowar", + "speakers": 350252, + "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5283,9 +5271,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Buriat", - "speakers": 311788, - "family": "Mongolic-Khitan", + "language_name": "Tolaki", + "speakers": 347134, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5295,9 +5283,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Rinconada Bikol", - "speakers": 305707, - "family": "Austronesian", + "language_name": "Naxi", + "speakers": 334565, + "family": "Sino-Tibetan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5307,11 +5295,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Fon", - "speakers": 3216150, - "family": "Atlantic-Congo", + "language_name": "Eastern Magar", + "speakers": 333607, + "family": "Sino-Tibetan", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5319,9 +5307,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Algerian Arabic", - "speakers": 35667507, - "family": "Afro-Asiatic", + "language_name": "Bamun", + "speakers": 332940, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5331,11 +5319,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Moroccan Arabic", - "speakers": 30938679, - "family": "Afro-Asiatic", + "language_name": "Basaa", + "speakers": 332940, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5343,11 +5331,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Egyptian Arabic", - "speakers": 66639360, - "family": "Afro-Asiatic", + "language_name": "Koyra Chiini", + "speakers": 332408, + "family": "Songhay", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5355,11 +5343,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Assamese", - "speakers": 17239170, + "language_name": "Indus Kohistani", + "speakers": 326901, "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5367,8 +5355,20 @@ "translation_chrf": 0.0 }, { - "language_name": "Asu", - "speakers": 702634, + "language_name": "Gayo", + "speakers": 320431, + "family": "Austronesian", + "average": 0.0, + "in_benchmark": false, + "NaN": 0.0, + "classification_accuracy": 0.0, + "language_modeling_chrf": 0.0, + "translation_bleu": 0.0, + "translation_chrf": 0.0 + }, + { + "language_name": "Ngazidja Comorian", + "speakers": 313124, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -5379,11 +5379,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Asturian", - "speakers": 650205, - "family": "Indo-European", + "language_name": "Buriat", + "speakers": 311788, + "family": "Mongolic-Khitan", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5391,9 +5391,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Atikamekw", - "speakers": 6408, - "family": "Algic", + "language_name": "Rinconada Bikol", + "speakers": 305707, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5403,9 +5403,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Avaric", - "speakers": 552716, - "family": "Nakh-Daghestanian", + "language_name": "Medumba", + "speakers": 305195, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5415,11 +5415,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Awadhi", - "speakers": 25862924, - "family": "Indo-European", + "language_name": "Ao Naga", + "speakers": 305001, + "family": "Sino-Tibetan", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5427,11 +5427,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Aymara", - "speakers": 2838620, - "family": "Aymaran", + "language_name": "Badaga", + "speakers": 305001, + "family": "Dravidian", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5439,11 +5439,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Azerbaijani", - "speakers": 32446682, - "family": "Turkic", + "language_name": "Kochila Tharu", + "speakers": 303279, + "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5451,11 +5451,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Bashkir", - "speakers": 1842386, - "family": "Turkic", + "language_name": "Moksha", + "speakers": 297616, + "family": "Uralic", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5463,9 +5463,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Baluchi", - "speakers": 8227887, - "family": "Indo-European", + "language_name": "Tae'", + "speakers": 293729, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5475,11 +5475,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Balinese", - "speakers": 4806468, - "family": "Austronesian", + "language_name": "Nzima", + "speakers": 293402, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5487,9 +5487,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Bantawa", - "speakers": 454918, - "family": "Sino-Tibetan", + "language_name": "Nama", + "speakers": 289308, + "family": "Khoe-Kwadi", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5499,9 +5499,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Bavarian", - "speakers": 22043627, - "family": "Indo-European", + "language_name": "Ndzwani Comorian", + "speakers": 287736, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5511,9 +5511,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Basaa", - "speakers": 332940, - "family": "Atlantic-Congo", + "language_name": "Kumyk", + "speakers": 283444, + "family": "Turkic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5523,8 +5523,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Bamun", - "speakers": 332940, + "language_name": "Mundang", + "speakers": 277450, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -5535,11 +5535,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Najdi Arabic", - "speakers": 1025205, - "family": "Afro-Asiatic", + "language_name": "Mapuche", + "speakers": 272802, + "family": "Araucanian", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5547,9 +5547,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Araona", - "speakers": 105, - "family": "Pano-Tacanan", + "language_name": "Western Cham", + "speakers": 270832, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5559,9 +5559,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Ghomala", - "speakers": 388430, - "family": "Atlantic-Congo", + "language_name": "Bislama", + "speakers": 268500, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5571,9 +5571,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Mapuche", - "speakers": 272802, - "family": "Araucanian", + "language_name": "Lü", + "speakers": 264864, + "family": "Tai-Kadai", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5583,9 +5583,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Abron", - "speakers": 1467010, - "family": "Atlantic-Congo", + "language_name": "Tai Nüa", + "speakers": 264864, + "family": "Tai-Kadai", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5595,11 +5595,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Achinese", - "speakers": 3738364, - "family": "Austronesian", + "language_name": "Wadiyara Koli", + "speakers": 256851, + "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5607,9 +5607,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Acoli", - "speakers": 1600361, - "family": "Nilotic", + "language_name": "Kachi Koli", + "speakers": 256851, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5619,9 +5619,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Adangme", - "speakers": 880206, - "family": "Atlantic-Congo", + "language_name": "Lezghian", + "speakers": 255100, + "family": "Nakh-Daghestanian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5631,9 +5631,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Adyghe", - "speakers": 444583, - "family": "Abkhaz-Adyge", + "language_name": "Komi", + "speakers": 255100, + "family": "Uralic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5643,9 +5643,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Tunisian Arabic", - "speakers": 10549080, - "family": "Afro-Asiatic", + "language_name": "Samoan", + "speakers": 252717, + "family": "Austronesian", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -5655,11 +5655,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Afrikaans", - "speakers": 9318845, - "family": "Indo-European", + "language_name": "Western Magar", + "speakers": 251722, + "family": "Sino-Tibetan", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5667,9 +5667,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Aghem", - "speakers": 38843, - "family": "Atlantic-Congo", + "language_name": "Samburu", + "speakers": 246228, + "family": "Nilotic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5679,11 +5679,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Akan", - "speakers": 11442678, - "family": "Atlantic-Congo", + "language_name": "Crimean Tatar", + "speakers": 245968, + "family": "Turkic", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5691,9 +5691,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Gheg Albanian", - "speakers": 1430250, - "family": "Indo-European", + "language_name": "Mandar", + "speakers": 245664, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5703,9 +5703,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Southern Altai", - "speakers": 19841, - "family": "Turkic", + "language_name": "Sangir", + "speakers": 245664, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5715,11 +5715,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Amharic", - "speakers": 35728475, - "family": "Afro-Asiatic", + "language_name": "Extremaduran", + "speakers": 245077, + "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5727,9 +5727,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Amo", - "speakers": 18620, - "family": "Atlantic-Congo", + "language_name": "Central Huasteca Nahuatl", + "speakers": 244435, + "family": "Uto-Aztecan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5739,8 +5739,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Aragonese", - "speakers": 26008, + "language_name": "Zeelandic", + "speakers": 241926, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -5751,9 +5751,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Obolo", - "speakers": 0, - "family": "Atlantic-Congo", + "language_name": "Karachay-Balkar", + "speakers": 240927, + "family": "Turkic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5763,9 +5763,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Uab Meto", - "speakers": 720970, - "family": "Austronesian", + "language_name": "Colognian", + "speakers": 240479, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5775,9 +5775,9 @@ "translation_chrf": 0.0 }, { - "language_name": "North Levantine Arabic", - "speakers": 39031474, - "family": "Afro-Asiatic", + "language_name": "Herero", + "speakers": 239336, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5787,9 +5787,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Batak Toba", - "speakers": 2456639, - "family": "Austronesian", + "language_name": "Saafi-Saafi", + "speakers": 236046, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5799,9 +5799,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Baoulé", - "speakers": 3022921, - "family": "Atlantic-Congo", + "language_name": "Zaghawa", + "speakers": 232364, + "family": "Saharan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5811,9 +5811,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Akoose", - "speakers": 149823, - "family": "Atlantic-Congo", + "language_name": "Ingush", + "speakers": 226755, + "family": "Nakh-Daghestanian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5823,11 +5823,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Banjar", - "speakers": 4010288, - "family": "Austronesian", + "language_name": "Saho", + "speakers": 218923, + "family": "Afro-Asiatic", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5835,9 +5835,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kom", - "speakers": 360685, - "family": "Atlantic-Congo", + "language_name": "Tomo Kan Dogon", + "speakers": 215087, + "family": "Dogon", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5847,11 +5847,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Buhid", - "speakers": 7970, - "family": "Austronesian", + "language_name": "Papiamento", + "speakers": 211640, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5859,9 +5859,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Siksiká", - "speakers": 4900, - "family": "Algic", + "language_name": "Syriac", + "speakers": 210659, + "family": "Afro-Asiatic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5871,8 +5871,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Anii", - "speakers": 51507, + "language_name": "Nyasa Tonga", + "speakers": 207727, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -5883,9 +5883,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Tai Dam", - "speakers": 681177, - "family": "Tai-Kadai", + "language_name": "Mafa", + "speakers": 205313, + "family": "Afro-Asiatic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5895,11 +5895,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Bambara", - "speakers": 9385632, - "family": "Mande", + "language_name": "Punu", + "speakers": 200782, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5907,8 +5907,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Bomu", - "speakers": 168159, + "language_name": "Tyap", + "speakers": 199046, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -5919,11 +5919,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Tibetan", - "speakers": 3006697, - "family": "Sino-Tibetan", + "language_name": "Xaasongaxango", + "speakers": 195534, + "family": "Mande", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -5931,9 +5931,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Bishnupriya", - "speakers": 90174, - "family": "Indo-European", + "language_name": "Tuvinian", + "speakers": 184239, + "family": "Turkic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5943,9 +5943,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Bakhtiari", - "speakers": 1188926, - "family": "Indo-European", + "language_name": "Central Dusun", + "speakers": 182852, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5955,9 +5955,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Koro Wachi", - "speakers": 46718, - "family": "Atlantic-Congo", + "language_name": "Riang (India)", + "speakers": 172392, + "family": "Sino-Tibetan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5967,9 +5967,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Breton", - "speakers": 563140, - "family": "Indo-European", + "language_name": "Comorian", + "speakers": 170720, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5979,9 +5979,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Braj", - "speakers": 54370, - "family": "Indo-European", + "language_name": "Bomu", + "speakers": 168159, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -5991,11 +5991,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Brahui", - "speakers": 3035513, - "family": "Dravidian", + "language_name": "Latgalian", + "speakers": 167429, + "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6003,11 +6003,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Bodo", - "speakers": 1856526, - "family": "Sino-Tibetan", + "language_name": "Navajo", + "speakers": 166320, + "family": "Athabaskan-Eyak-Tlingit", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6015,11 +6015,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Bosnian", - "speakers": 7594468, - "family": "Indo-European", + "language_name": "Jenaama Bozo", + "speakers": 166204, + "family": "Mande", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6027,9 +6027,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Bassari", - "speakers": 15264, - "family": "Atlantic-Congo", + "language_name": "Corsican", + "speakers": 162836, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6039,8 +6039,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Balanta-Ganja", - "speakers": 95992, + "language_name": "Bafut", + "speakers": 158146, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -6051,9 +6051,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kanauji", - "speakers": 7426104, - "family": "Indo-European", + "language_name": "Sherpa", + "speakers": 157705, + "family": "Sino-Tibetan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6063,11 +6063,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Belarusian", - "speakers": 10064517, + "language_name": "Kirmanjki", + "speakers": 155833, "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6075,9 +6075,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Bini", - "speakers": 1519599, - "family": "Atlantic-Congo", + "language_name": "Hiri Motu", + "speakers": 152449, + "family": "Pidgin", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6087,9 +6087,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Beja", - "speakers": 2460326, - "family": "Afro-Asiatic", + "language_name": "Kako", + "speakers": 149823, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6099,11 +6099,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Bemba", - "speakers": 5402246, + "language_name": "Akoose", + "speakers": 149823, "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6111,8 +6111,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Betawi", - "speakers": 5607546, + "language_name": "Selayar", + "speakers": 144194, "family": "Austronesian", "average": 0.0, "in_benchmark": false, @@ -6123,11 +6123,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Bena", - "speakers": 995398, - "family": "Atlantic-Congo", + "language_name": "Māori", + "speakers": 137913, + "family": "Austronesian", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6135,9 +6135,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Bafut", - "speakers": 158146, - "family": "Atlantic-Congo", + "language_name": "Rajbanshi", + "speakers": 133443, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6147,9 +6147,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Badaga", - "speakers": 305001, - "family": "Dravidian", + "language_name": "Duala", + "speakers": 133176, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6159,9 +6159,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Balti", - "speakers": 502520, - "family": "Sino-Tibetan", + "language_name": "Wayuu", + "speakers": 132529, + "family": "Arawakan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6171,9 +6171,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Bagheli", - "speakers": 654424, - "family": "Indo-European", + "language_name": "Vai", + "speakers": 131906, + "family": "Mande", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6183,11 +6183,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Bulgarian", - "speakers": 7878315, - "family": "Indo-European", + "language_name": "Eastern Tamang", + "speakers": 130410, + "family": "Sino-Tibetan", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6195,9 +6195,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Haryanvi", - "speakers": 15913080, - "family": "Indo-European", + "language_name": "Metaʼ", + "speakers": 130401, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6207,8 +6207,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Western Balochi", - "speakers": 2037382, + "language_name": "Pennsylvania German", + "speakers": 129729, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -6219,9 +6219,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Balkan Gagauz Turkish", - "speakers": 377280, - "family": "Turkic", + "language_name": "Rwa", + "speakers": 128816, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6231,8 +6231,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Bhili", - "speakers": 1591308, + "language_name": "Torwali", + "speakers": 123756, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -6243,9 +6243,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Bhilali", - "speakers": 1220003, - "family": "Indo-European", + "language_name": "Mandjak", + "speakers": 121170, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6255,11 +6255,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Bhojpuri", - "speakers": 32934797, - "family": "Indo-European", + "language_name": "Tshangla", + "speakers": 117348, + "family": "Sino-Tibetan", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6267,9 +6267,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Bislama", - "speakers": 268500, - "family": "Indo-European", + "language_name": "Sangu", + "speakers": 117106, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6279,9 +6279,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Bikol", - "speakers": 3275430, - "family": "Austronesian", + "language_name": "Karelian", + "speakers": 116212, + "family": "Uralic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6291,11 +6291,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Faroese", - "speakers": 71351, + "language_name": "Ladino", + "speakers": 112781, "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6303,9 +6303,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Cajun French", - "speakers": 27942, - "family": "Indo-European", + "language_name": "Ifè", + "speakers": 111910, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6315,9 +6315,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Lisu", - "speakers": 627309, - "family": "Sino-Tibetan", + "language_name": "Gagauz", + "speakers": 111028, + "family": "Turkic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6327,9 +6327,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Komering", - "speakers": 854483, - "family": "Austronesian", + "language_name": "Lak", + "speakers": 110543, + "family": "Nakh-Daghestanian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6339,9 +6339,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Khasi", - "speakers": 1060872, - "family": "Austroasiatic", + "language_name": "Sassarese Sardinian", + "speakers": 106085, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6351,9 +6351,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Lü", - "speakers": 264864, - "family": "Tai-Kadai", + "language_name": "Tongan", + "speakers": 100790, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6363,9 +6363,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Khandesi", - "speakers": 1989135, - "family": "Indo-European", + "language_name": "Balanta-Ganja", + "speakers": 95992, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6375,9 +6375,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Koyra Chiini", - "speakers": 332408, - "family": "Songhay", + "language_name": "Ngomba", + "speakers": 94333, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6387,9 +6387,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Khamti", - "speakers": 13527, - "family": "Tai-Kadai", + "language_name": "Seselwa Creole French", + "speakers": 94061, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6399,9 +6399,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Khowar", - "speakers": 350252, - "family": "Indo-European", + "language_name": "Abkhazian", + "speakers": 91953, + "family": "Abkhaz-Adyge", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6411,11 +6411,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Kikuyu", - "speakers": 9099743, - "family": "Atlantic-Congo", + "language_name": "Tahitian", + "speakers": 91488, + "family": "Austronesian", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6423,9 +6423,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kirmanjki", - "speakers": 155833, - "family": "Indo-European", + "language_name": "Inuktitut", + "speakers": 90466, + "family": "Eskimo-Aleut", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6435,9 +6435,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kuanyama", - "speakers": 920524, - "family": "Atlantic-Congo", + "language_name": "Plautdietsch", + "speakers": 90466, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6447,9 +6447,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Khmu", - "speakers": 431949, - "family": "Austroasiatic", + "language_name": "Bishnupriya", + "speakers": 90174, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6459,11 +6459,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Kazakh", - "speakers": 13637392, - "family": "Turkic", + "language_name": "Bafia", + "speakers": 88784, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6471,9 +6471,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kako", - "speakers": 149823, - "family": "Atlantic-Congo", + "language_name": "Gurung", + "speakers": 87951, + "family": "Sino-Tibetan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6483,9 +6483,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kalaallisut", - "speakers": 55440, - "family": "Eskimo-Aleut", + "language_name": "Eastern Cham", + "speakers": 87862, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6495,9 +6495,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kalenjin", - "speakers": 4068120, - "family": "Nilotic", + "language_name": "Western Lawa", + "speakers": 87751, + "family": "Austroasiatic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6507,11 +6507,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Khmer", - "speakers": 15065030, - "family": "Austroasiatic", + "language_name": "Mankanya", + "speakers": 83151, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6519,11 +6519,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Kimbundu", - "speakers": 8130575, - "family": "Atlantic-Congo", + "language_name": "Lepcha", + "speakers": 79743, + "family": "Sino-Tibetan", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6531,11 +6531,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Kannada", - "speakers": 49065330, - "family": "Dravidian", + "language_name": "Blin", + "speakers": 79056, + "family": "Afro-Asiatic", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6543,9 +6543,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kaingang", - "speakers": 50812, - "family": "Nuclear-Macro-Je", + "language_name": "Bateri", + "speakers": 78843, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6555,9 +6555,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kongo", - "speakers": 1526700, - "family": "Atlantic-Congo", + "language_name": "Kathoriya Tharu", + "speakers": 72787, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6567,9 +6567,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Korean", - "speakers": 78357046, - "family": "Koreanic", + "language_name": "Scottish Gaelic", + "speakers": 72337, + "family": "Indo-European", "average": 0.0, "in_benchmark": true, "NaN": 0.0, @@ -6579,11 +6579,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Kumaoni", - "speakers": 2917398, + "language_name": "Faroese", + "speakers": 71351, "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6591,11 +6591,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Javanese", - "speakers": 91180665, - "family": "Austronesian", + "language_name": "Võro", + "speakers": 70031, + "family": "Uralic", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6603,11 +6603,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Georgian", - "speakers": 3543646, - "family": "Kartvelian", + "language_name": "Kenyang", + "speakers": 69362, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6615,9 +6615,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kara-Kalpak", - "speakers": 489046, - "family": "Turkic", + "language_name": "Gilbertese", + "speakers": 67078, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6627,11 +6627,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Kabyle", - "speakers": 3351886, - "family": "Afro-Asiatic", + "language_name": "Bube", + "speakers": 66058, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6639,11 +6639,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Kachin", - "speakers": 962032, - "family": "Sino-Tibetan", + "language_name": "Mentawai", + "speakers": 64086, + "family": "Austronesian", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6651,9 +6651,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Jju", - "speakers": 449459, - "family": "Atlantic-Congo", + "language_name": "Arpitan", + "speakers": 63777, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6663,11 +6663,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Kamba", - "speakers": 4068120, - "family": "Atlantic-Congo", + "language_name": "Komi-Permyak", + "speakers": 63775, + "family": "Uralic", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6675,8 +6675,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Xaasongaxango", - "speakers": 195534, + "language_name": "Koro", + "speakers": 63207, "family": "Mande", "average": 0.0, "in_benchmark": false, @@ -6687,9 +6687,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kabardian", - "speakers": 1070873, - "family": "Abkhaz-Adyge", + "language_name": "Marshallese", + "speakers": 56879, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6699,9 +6699,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Tyap", - "speakers": 199046, - "family": "Atlantic-Congo", + "language_name": "Tornedalen Finnish", + "speakers": 56114, + "family": "Uralic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6711,9 +6711,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kalanga", - "speakers": 770954, - "family": "Atlantic-Congo", + "language_name": "Kalaallisut", + "speakers": 55440, + "family": "Eskimo-Aleut", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6723,9 +6723,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Makonde", - "speakers": 1463820, - "family": "Atlantic-Congo", + "language_name": "Braj", + "speakers": 54370, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6735,9 +6735,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kuy", - "speakers": 421207, - "family": "Austroasiatic", + "language_name": "Guianese Creole French", + "speakers": 51872, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6747,11 +6747,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Kabuverdianu", - "speakers": 530762, - "family": "Indo-European", + "language_name": "Northern Sami", + "speakers": 51530, + "family": "Uralic", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6759,8 +6759,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Kenyang", - "speakers": 69362, + "language_name": "Anii", + "speakers": 51507, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -6771,9 +6771,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Koro", - "speakers": 63207, - "family": "Mande", + "language_name": "Kaingang", + "speakers": 50812, + "family": "Nuclear-Macro-Je", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6783,8 +6783,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Kachhi", - "speakers": 994568, + "language_name": "Kashubian", + "speakers": 49767, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -6795,8 +6795,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Mankanya", - "speakers": 83151, + "language_name": "Koro Wachi", + "speakers": 46718, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -6807,9 +6807,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Komi-Permyak", - "speakers": 63775, - "family": "Uralic", + "language_name": "Chamorro", + "speakers": 46325, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6817,11 +6817,11 @@ "language_modeling_chrf": 0.0, "translation_bleu": 0.0, "translation_chrf": 0.0 - }, - { - "language_name": "Jumli", - "speakers": 970493, - "family": "Indo-European", + }, + { + "language_name": "Atsam", + "speakers": 44946, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6831,9 +6831,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Wadiyara Koli", - "speakers": 256851, - "family": "Indo-European", + "language_name": "Bushi", + "speakers": 44620, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6843,11 +6843,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Kyrgyz", - "speakers": 3338267, - "family": "Turkic", + "language_name": "Romansh", + "speakers": 42020, + "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6855,9 +6855,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Latin", - "speakers": 820, - "family": "Indo-European", + "language_name": "Aghem", + "speakers": 38843, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6867,9 +6867,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Ladino", - "speakers": 112781, - "family": "Indo-European", + "language_name": "Kuvi", + "speakers": 38457, + "family": "Dravidian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6879,8 +6879,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Langi", - "speakers": 509409, + "language_name": "Noon", + "speakers": 37767, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -6891,11 +6891,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Western Panjabi", - "speakers": 93433552, + "language_name": "Friulian", + "speakers": 37442, "family": "Indo-European", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6903,9 +6903,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Lango (Uganda)", - "speakers": 1643614, - "family": "Nilotic", + "language_name": "Thulung", + "speakers": 36393, + "family": "Sino-Tibetan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6915,11 +6915,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Luxembourgish", - "speakers": 421015, + "language_name": "Emilian", + "speakers": 31201, "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6927,9 +6927,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Lak", - "speakers": 110543, - "family": "Nakh-Daghestanian", + "language_name": "Chuukese", + "speakers": 30731, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6939,9 +6939,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Tolaki", - "speakers": 347134, - "family": "Austronesian", + "language_name": "Western Mari", + "speakers": 29762, + "family": "Uralic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6951,9 +6951,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Western Lawa", - "speakers": 87751, - "family": "Austroasiatic", + "language_name": "Hawaiian", + "speakers": 29605, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6963,8 +6963,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Lepcha", - "speakers": 79743, + "language_name": "Mru", + "speakers": 29277, "family": "Sino-Tibetan", "average": 0.0, "in_benchmark": false, @@ -6975,9 +6975,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Lezghian", - "speakers": 255100, - "family": "Nakh-Daghestanian", + "language_name": "Cajun French", + "speakers": 27942, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -6987,11 +6987,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Ganda", - "speakers": 5622890, - "family": "Atlantic-Congo", + "language_name": "Nheengatu", + "speakers": 26171, + "family": "Tupian", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -6999,11 +6999,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Limburgish", - "speakers": 950422, + "language_name": "Aragonese", + "speakers": 26008, "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -7011,9 +7011,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Limbu", - "speakers": 368085, - "family": "Sino-Tibetan", + "language_name": "Cherokee", + "speakers": 25613, + "family": "Iroquoian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7023,11 +7023,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Ligurian", - "speakers": 536663, + "language_name": "Sinte Romani", + "speakers": 24372, "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -7035,9 +7035,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Abkhazian", - "speakers": 91953, - "family": "Abkhaz-Adyge", + "language_name": "Ojibwa", + "speakers": 23747, + "family": "Algic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7047,9 +7047,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kuvi", - "speakers": 38457, - "family": "Dravidian", + "language_name": "Pohnpeian", + "speakers": 23560, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7059,9 +7059,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Northern Khmer", - "speakers": 1172616, - "family": "Austroasiatic", + "language_name": "Laz", + "speakers": 22965, + "family": "Kartvelian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7071,8 +7071,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Konkani", - "speakers": 4906533, + "language_name": "Muslim Tat", + "speakers": 22453, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -7083,9 +7083,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kwakʼwala", - "speakers": 377, - "family": "Wakashan", + "language_name": "Central Yupik", + "speakers": 20956, + "family": "Eskimo-Aleut", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7095,9 +7095,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kosraean", - "speakers": 7990, - "family": "Austronesian", + "language_name": "Dakota", + "speakers": 20832, + "family": "Siouan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7107,9 +7107,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kpelle", - "speakers": 1186303, - "family": "Mande", + "language_name": "Southern Altai", + "speakers": 19841, + "family": "Turkic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7119,9 +7119,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Karachay-Balkar", - "speakers": 240927, - "family": "Turkic", + "language_name": "Amo", + "speakers": 18620, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7131,9 +7131,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Krio", - "speakers": 6293684, - "family": "Indo-European", + "language_name": "Guajajára", + "speakers": 17784, + "family": "Tupian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7143,8 +7143,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Kinaray-a", - "speakers": 425806, + "language_name": "Hanunoo", + "speakers": 17469, "family": "Austronesian", "average": 0.0, "in_benchmark": false, @@ -7155,9 +7155,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Karelian", - "speakers": 116212, - "family": "Uralic", + "language_name": "Tsakhur", + "speakers": 16329, + "family": "Nakh-Daghestanian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7167,9 +7167,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kurukh", - "speakers": 2519571, - "family": "Dravidian", + "language_name": "Palauan", + "speakers": 16047, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7179,8 +7179,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Kashmiri", - "speakers": 5598085, + "language_name": "Sanskrit", + "speakers": 15913, "family": "Indo-European", "average": 0.0, "in_benchmark": true, @@ -7191,8 +7191,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Shambala", - "speakers": 995398, + "language_name": "Bassari", + "speakers": 15264, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -7203,9 +7203,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Bafia", - "speakers": 88784, - "family": "Atlantic-Congo", + "language_name": "Oji-Cree", + "speakers": 15078, + "family": "Algic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7215,9 +7215,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Colognian", - "speakers": 240479, - "family": "Indo-European", + "language_name": "Khamti", + "speakers": 13527, + "family": "Tai-Kadai", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7227,11 +7227,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Kurdish", - "speakers": 6866757, + "language_name": "Upper Sorbian", + "speakers": 12826, "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -7239,9 +7239,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kumyk", - "speakers": 283444, - "family": "Turkic", + "language_name": "Chipewyan", + "speakers": 12816, + "family": "Athabaskan-Eyak-Tlingit", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7251,9 +7251,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Komi", - "speakers": 255100, - "family": "Uralic", + "language_name": "Innu-aimun", + "speakers": 12062, + "family": "Algic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7263,9 +7263,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Kerinci", - "speakers": 373836, - "family": "Austronesian", + "language_name": "Lower Silesian", + "speakers": 11868, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7275,8 +7275,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Parkari Koli", - "speakers": 373602, + "language_name": "Walser", + "speakers": 11377, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -7287,9 +7287,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Cornish", - "speakers": 1973, - "family": "Indo-European", + "language_name": "Choctaw", + "speakers": 10977, + "family": "Muskogean", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7299,9 +7299,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Jutish", - "speakers": 0, - "family": "Indo-European", + "language_name": "Tagbanwa", + "speakers": 10045, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7311,9 +7311,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Machame", - "speakers": 433291, - "family": "Atlantic-Congo", + "language_name": "Xavánte", + "speakers": 9951, + "family": "Nuclear-Macro-Je", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7323,9 +7323,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Arpitan", - "speakers": 63777, - "family": "Indo-European", + "language_name": "Tuvalu", + "speakers": 9868, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7335,8 +7335,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Kachi Koli", - "speakers": 256851, + "language_name": "Northern Frisian", + "speakers": 9619, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -7345,13 +7345,13 @@ "language_modeling_chrf": 0.0, "translation_bleu": 0.0, "translation_chrf": 0.0 - }, - { - "language_name": "Galician", - "speakers": 3515530, - "family": "Indo-European", + }, + { + "language_name": "Roviana", + "speakers": 9591, + "family": "Austronesian", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -7359,9 +7359,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Gilaki", - "speakers": 3906472, - "family": "Indo-European", + "language_name": "Wallisian", + "speakers": 9512, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7371,11 +7371,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Guarani", - "speakers": 5827107, - "family": "Tupian", + "language_name": "Tavringer Romani", + "speakers": 9488, + "family": "Speech Register", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -7383,11 +7383,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Goan Konkani", - "speakers": 4243488, - "family": "Indo-European", + "language_name": "Cree", + "speakers": 9047, + "family": "Algic", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -7395,9 +7395,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Gondi", - "speakers": 3182616, - "family": "Dravidian", + "language_name": "Kwasio", + "speakers": 8878, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7407,9 +7407,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Gorontalo", - "speakers": 1094807, - "family": "Austronesian", + "language_name": "Lakota", + "speakers": 8316, + "family": "Siouan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7419,9 +7419,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Gronings", - "speakers": 622094, - "family": "Indo-European", + "language_name": "Kosraean", + "speakers": 7990, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7431,9 +7431,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Garo", - "speakers": 821563, - "family": "Sino-Tibetan", + "language_name": "Inupiaq", + "speakers": 7983, + "family": "Eskimo-Aleut", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7443,8 +7443,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Swiss German", - "speakers": 7956952, + "language_name": "Zoroastrian Dari", + "speakers": 7983, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -7455,11 +7455,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Gujarati", - "speakers": 61721799, - "family": "Indo-European", + "language_name": "Tasawaq", + "speakers": 7970, + "family": "Songhay", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -7467,9 +7467,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Guajajára", - "speakers": 17784, - "family": "Tupian", + "language_name": "Buhid", + "speakers": 7970, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7479,9 +7479,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Wayuu", - "speakers": 132529, - "family": "Arawakan", + "language_name": "Mi'kmaw", + "speakers": 7916, + "family": "Algic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7491,9 +7491,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Frafra", - "speakers": 1026907, - "family": "Atlantic-Congo", + "language_name": "Hassaniyya", + "speakers": 7239, + "family": "Afro-Asiatic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7503,9 +7503,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Gusii", - "speakers": 2622867, - "family": "Atlantic-Congo", + "language_name": "Lower Sorbian", + "speakers": 6974, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7515,9 +7515,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Manx", - "speakers": 1719, - "family": "Indo-European", + "language_name": "Nauru", + "speakers": 6930, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7527,9 +7527,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Gurung", - "speakers": 87951, - "family": "Sino-Tibetan", + "language_name": "Eastern Lawa", + "speakers": 6898, + "family": "Austroasiatic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7539,9 +7539,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Gwichʼin", - "speakers": 302, - "family": "Athabaskan-Eyak-Tlingit", + "language_name": "Yapese", + "speakers": 6556, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7551,9 +7551,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Gujari", - "speakers": 467002, - "family": "Indo-European", + "language_name": "Atikamekw", + "speakers": 6408, + "family": "Algic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7563,9 +7563,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Gilbertese", - "speakers": 67078, - "family": "Austronesian", + "language_name": "Piedmontese", + "speakers": 6178, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7575,9 +7575,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Hakka Chinese", - "speakers": 32062460, - "family": "Sino-Tibetan", + "language_name": "Kalo Finnish Romani", + "speakers": 5015, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7587,9 +7587,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Geez", - "speakers": 0, - "family": "Afro-Asiatic", + "language_name": "Siksiká", + "speakers": 4900, + "family": "Algic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7599,9 +7599,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Northern Frisian", - "speakers": 9619, - "family": "Indo-European", + "language_name": "East Futuna", + "speakers": 4756, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7611,9 +7611,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Eastern Frisian", - "speakers": 2004, - "family": "Indo-European", + "language_name": "Taroko", + "speakers": 4721, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7623,9 +7623,9 @@ "translation_chrf": 0.0 }, { - "language_name": "East Futuna", - "speakers": 4756, - "family": "Austronesian", + "language_name": "Plains Cree", + "speakers": 4146, + "family": "Algic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7635,8 +7635,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Central-Eastern Niger Fulfulde", - "speakers": 1594068, + "language_name": "Safaliba", + "speakers": 4108, "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, @@ -7647,11 +7647,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Friulian", - "speakers": 37442, - "family": "Indo-European", + "language_name": "Muscogee", + "speakers": 3992, + "family": "Muskogean", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -7659,11 +7659,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Nigerian Fulfulde", - "speakers": 14339876, - "family": "Atlantic-Congo", + "language_name": "Veps", + "speakers": 3543, + "family": "Uralic", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -7671,9 +7671,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Fur", - "speakers": 1230163, - "family": "Furan", + "language_name": "Ménik", + "speakers": 3305, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7683,8 +7683,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Western Frisian", - "speakers": 743057, + "language_name": "Virgin Islands Creole English", + "speakers": 3113, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -7695,11 +7695,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Irish", - "speakers": 1237487, - "family": "Indo-European", + "language_name": "Turoyo", + "speakers": 3035, + "family": "Afro-Asiatic", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -7707,9 +7707,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Ga", - "speakers": 821526, - "family": "Atlantic-Congo", + "language_name": "Ulithian", + "speakers": 2971, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7719,9 +7719,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Gagauz", - "speakers": 111028, - "family": "Turkic", + "language_name": "Rotuman", + "speakers": 2527, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7731,9 +7731,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Gan Chinese", - "speakers": 23698340, - "family": "Sino-Tibetan", + "language_name": "Warlpiri", + "speakers": 2496, + "family": "Pama-Nyungan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7743,9 +7743,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Gayo", - "speakers": 320431, - "family": "Austronesian", + "language_name": "Yangben", + "speakers": 2303, + "family": "Atlantic-Congo", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7755,9 +7755,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Garhwali", - "speakers": 3580443, - "family": "Indo-European", + "language_name": "Slave", + "speakers": 2299, + "family": "Athabaskan-Eyak-Tlingit", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7767,9 +7767,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Zoroastrian Dari", - "speakers": 7983, - "family": "Indo-European", + "language_name": "Dogrib", + "speakers": 2111, + "family": "Athabaskan-Eyak-Tlingit", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7779,8 +7779,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Guianese Creole French", - "speakers": 51872, + "language_name": "Eastern Frisian", + "speakers": 2004, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -7791,11 +7791,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Scottish Gaelic", - "speakers": 72337, + "language_name": "Cornish", + "speakers": 1973, "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -7803,11 +7803,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Hausa", - "speakers": 40411882, - "family": "Afro-Asiatic", + "language_name": "Swampy Cree", + "speakers": 1809, + "family": "Algic", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -7815,9 +7815,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Hawaiian", - "speakers": 29605, - "family": "Austronesian", + "language_name": "Mohawk", + "speakers": 1772, + "family": "Iroquoian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7827,9 +7827,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Ngomba", - "speakers": 94333, - "family": "Atlantic-Congo", + "language_name": "Manx", + "speakers": 1719, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7839,9 +7839,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Interlingua", - "speakers": 136, - "family": "Artificial Language", + "language_name": "Lule Sami", + "speakers": 1530, + "family": "Uralic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7851,9 +7851,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Ibibio", - "speakers": 2996392, - "family": "Atlantic-Congo", + "language_name": "Naskapi", + "speakers": 1395, + "family": "Algic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7863,11 +7863,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Indonesian", - "speakers": 171207687, + "language_name": "Tokelau", + "speakers": 1285, "family": "Austronesian", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -7875,9 +7875,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Interlingue", - "speakers": 1, - "family": "Artificial Language", + "language_name": "Niuean", + "speakers": 1120, + "family": "Austronesian", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7887,9 +7887,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Ifè", - "speakers": 111910, - "family": "Atlantic-Congo", + "language_name": "Saterland Frisian", + "speakers": 962, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7899,11 +7899,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Igbo", - "speakers": 27823640, - "family": "Atlantic-Congo", + "language_name": "Seri", + "speakers": 901, + "family": null, "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -7911,9 +7911,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Sichuan Yi", - "speakers": 8364120, - "family": "Sino-Tibetan", + "language_name": "Chilcotin", + "speakers": 867, + "family": "Athabaskan-Eyak-Tlingit", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7923,9 +7923,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Inupiaq", - "speakers": 7983, - "family": "Eskimo-Aleut", + "language_name": "Latin", + "speakers": 820, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7935,11 +7935,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Iloko", - "speakers": 10481376, - "family": "Austronesian", + "language_name": "Halkomelem", + "speakers": 716, + "family": "Salishan", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -7947,9 +7947,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Ingush", - "speakers": 226755, - "family": "Nakh-Daghestanian", + "language_name": "Michif", + "speakers": 678, + "family": "Algic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7959,9 +7959,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Ido", - "speakers": 0, - "family": "Artificial Language", + "language_name": "Skolt Sami", + "speakers": 613, + "family": "Uralic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -7971,11 +7971,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Icelandic", - "speakers": 350734, - "family": "Indo-European", + "language_name": "Inari Sami", + "speakers": 613, + "family": "Uralic", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -7983,11 +7983,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Italian", - "speakers": 70247060, - "family": "Indo-European", + "language_name": "Lillooet", + "speakers": 528, + "family": "Salishan", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -7995,9 +7995,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Inuktitut", - "speakers": 90466, - "family": "Eskimo-Aleut", + "language_name": "Okanagan", + "speakers": 490, + "family": "Salishan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -8007,9 +8007,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Ingrian", - "speakers": 142, - "family": "Uralic", + "language_name": "Maliseet-Passamaquoddy", + "speakers": 490, + "family": "Algic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -8019,11 +8019,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Japanese", - "speakers": 119729026, - "family": "Japonic", + "language_name": "Kwakʼwala", + "speakers": 377, + "family": "Wakashan", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -8031,9 +8031,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Jamaican Creole English", - "speakers": 2668142, - "family": "Indo-European", + "language_name": "Northern East Cree", + "speakers": 377, + "family": "Algic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -8043,9 +8043,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Lojban", - "speakers": 0, - "family": "Artificial Language", + "language_name": "Gwichʼin", + "speakers": 302, + "family": "Athabaskan-Eyak-Tlingit", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -8055,11 +8055,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Iban", - "speakers": 816302, - "family": "Bookkeeping", + "language_name": "Esperanto", + "speakers": 301, + "family": "Artificial Language", "average": 0.0, - "in_benchmark": false, + "in_benchmark": true, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -8067,9 +8067,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Herero", - "speakers": 239336, - "family": "Atlantic-Congo", + "language_name": "Southern Sami", + "speakers": 296, + "family": "Uralic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -8079,8 +8079,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Hazaragi", - "speakers": 2161984, + "language_name": "Tsakonian", + "speakers": 202, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -8091,11 +8091,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Armenian", - "speakers": 5317273, - "family": "Indo-European", + "language_name": "Ingrian", + "speakers": 142, + "family": "Uralic", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -8103,11 +8103,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Hebrew", - "speakers": 8675480, - "family": "Afro-Asiatic", + "language_name": "Interlingua", + "speakers": 136, + "family": "Artificial Language", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -8115,9 +8115,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Fiji Hindi", - "speakers": 383749, - "family": "Indo-European", + "language_name": "Araona", + "speakers": 105, + "family": "Pano-Tacanan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -8127,9 +8127,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Hiligaynon", - "speakers": 9171204, - "family": "Austronesian", + "language_name": "Prussian", + "speakers": 38, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -8139,9 +8139,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Southern Hindko", - "speakers": 957354, - "family": "Indo-European", + "language_name": "Interlingue", + "speakers": 1, + "family": "Artificial Language", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -8151,11 +8151,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Chhattisgarhi", - "speakers": 14586990, - "family": "Indo-European", + "language_name": "Literary Chinese", + "speakers": 0, + "family": "Sino-Tibetan", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -8163,9 +8163,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Hmong Njua", - "speakers": 781687, - "family": "Hmong-Mien", + "language_name": "Ido", + "speakers": 0, + "family": "Artificial Language", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -8175,9 +8175,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Hanunoo", - "speakers": 17469, - "family": "Austronesian", + "language_name": "Lojban", + "speakers": 0, + "family": "Artificial Language", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -8187,8 +8187,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Northern Hindko", - "speakers": 3969517, + "language_name": "Jutish", + "speakers": 0, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -8199,9 +8199,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Hiri Motu", - "speakers": 152449, - "family": "Pidgin", + "language_name": "Votic", + "speakers": 0, + "family": "Uralic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -8211,9 +8211,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Ho", - "speakers": 1312829, - "family": "Austroasiatic", + "language_name": "Geez", + "speakers": 0, + "family": "Afro-Asiatic", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -8223,9 +8223,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Hadothi", - "speakers": 1087394, - "family": "Indo-European", + "language_name": "Osage", + "speakers": 0, + "family": "Siouan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -8235,11 +8235,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Croatian", - "speakers": 6813164, + "language_name": "Romagnol", + "speakers": 0, "family": "Indo-European", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -8247,8 +8247,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Upper Sorbian", - "speakers": 12826, + "language_name": "Church Slavic", + "speakers": 0, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -8259,9 +8259,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Xiang Chinese", - "speakers": 40426580, - "family": "Sino-Tibetan", + "language_name": "Samogitian", + "speakers": 0, + "family": "Indo-European", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -8271,11 +8271,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Haitian Creole", - "speakers": 8964918, - "family": "Indo-European", + "language_name": "Obolo", + "speakers": 0, + "family": "Atlantic-Congo", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -8283,11 +8283,11 @@ "translation_chrf": 0.0 }, { - "language_name": "Hungarian", - "speakers": 12443430, - "family": "Uralic", + "language_name": "Chickasaw", + "speakers": 0, + "family": "Muskogean", "average": 0.0, - "in_benchmark": true, + "in_benchmark": false, "NaN": 0.0, "classification_accuracy": 0.0, "language_modeling_chrf": 0.0, @@ -8295,9 +8295,9 @@ "translation_chrf": 0.0 }, { - "language_name": "Halkomelem", - "speakers": 716, - "family": "Salishan", + "language_name": "Caddo", + "speakers": 0, + "family": "Caddoan", "average": 0.0, "in_benchmark": false, "NaN": 0.0, @@ -8307,8 +8307,8 @@ "translation_chrf": 0.0 }, { - "language_name": "Zaza", - "speakers": 1148245, + "language_name": "Palatine German", + "speakers": 0, "family": "Indo-European", "average": 0.0, "in_benchmark": false, @@ -8323,8 +8323,9 @@ { "name": "FLORES+", "author": "Meta", + "author_url": "https://ai.meta.com", "url": "https://huggingface.co/datasets/openlanguagedata/flores_plus", - "n_languages": 200.0, + "n_languages": 200, "tasks": [ "translation", "classification", @@ -8337,8 +8338,9 @@ { "name": "FLEURS", "author": "Meta", + "author_url": "https://ai.meta.com", "url": "https://huggingface.co/datasets/google/fleurs", - "n_languages": 102.0, + "n_languages": 102, "tasks": [ "speech_recognition" ], @@ -8349,8 +8351,9 @@ { "name": "CommonVoice", "author": "Mozilla", + "author_url": "https://mozilla.ai", "url": "https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0", - "n_languages": 124.0, + "n_languages": 124, "tasks": [ "speech_recognition" ], @@ -8361,8 +8364,9 @@ { "name": "MMMLU", "author": "OpenAI", + "author_url": "https://openai.com", "url": "https://huggingface.co/datasets/openai/MMMLU", - "n_languages": 14.0, + "n_languages": 14, "tasks": [ "question_answering" ], @@ -8373,8 +8377,9 @@ { "name": "AfriMMLU", "author": "Masakhane", + "author_url": "https://www.masakhane.io", "url": "https://huggingface.co/datasets/masakhane/afrimmlu", - "n_languages": 17.0, + "n_languages": 17, "tasks": [ "question_answering" ], @@ -8384,9 +8389,10 @@ }, { "name": "Okapi MMLU", - "author": "Okapi", + "author": "Academic", + "author_url": null, "url": "https://huggingface.co/datasets/jon-tow/okapi_mmlu", - "n_languages": 16.0, + "n_languages": 16, "tasks": [ "question_answering" ], @@ -8397,8 +8403,9 @@ { "name": "Global MMLU", "author": "Cohere", + "author_url": "https://cohere.com", "url": "https://huggingface.co/datasets/CohereForAI/Global-MMLU", - "n_languages": 42.0, + "n_languages": 42, "tasks": [ "question_answering" ], @@ -8409,8 +8416,9 @@ { "name": "MGSM", "author": "Google", + "author_url": "https://google.com", "url": "https://huggingface.co/datasets/juletxara/mgsm", - "n_languages": 10.0, + "n_languages": 10, "tasks": [ "math" ], @@ -8421,8 +8429,9 @@ { "name": "AfriMGSM", "author": "Masakhane", + "author_url": "https://www.masakhane.io", "url": "https://huggingface.co/datasets/masakhane/afrimgsm", - "n_languages": 18.0, + "n_languages": 18, "tasks": [ "math" ], @@ -8432,9 +8441,10 @@ }, { "name": "Okapi ARC Challenge", - "author": "Okapi", + "author": "Academic", + "author_url": null, "url": "https://huggingface.co/datasets/jon-tow/okapi_arc_challenge", - "n_languages": 31.0, + "n_languages": 31, "tasks": [ "question_answering" ], @@ -8445,8 +8455,9 @@ { "name": "Uhuru ARC Easy", "author": "Masakhane", + "author_url": "https://www.masakhane.io", "url": "https://huggingface.co/datasets/masakhane/uhura-arc-easy", - "n_languages": 6.0, + "n_languages": 6, "tasks": [ "question_answering" ], @@ -8456,9 +8467,10 @@ }, { "name": "Okapi TruthfulQA", - "author": "Okapi", + "author": "Academic", + "author_url": null, "url": "https://huggingface.co/datasets/jon-tow/okapi_truthfulqa/tree/main/data", - "n_languages": 31.0, + "n_languages": 31, "tasks": [ "question_answering" ], @@ -8469,8 +8481,9 @@ { "name": "Uhura TruthfulQA", "author": "Masakhane", + "author_url": "https://www.masakhane.io", "url": "https://huggingface.co/datasets/masakhane/uhura-truthfulqa", - "n_languages": 6.0, + "n_languages": 6, "tasks": [ "question_answering" ], @@ -8481,8 +8494,9 @@ { "name": "XNLI", "author": "Meta", + "author_url": "https://ai.meta.com", "url": "https://huggingface.co/datasets/facebook/xnli", - "n_languages": 14.0, + "n_languages": 14, "tasks": [ "classification" ], @@ -8493,8 +8507,9 @@ { "name": "AfriXNLI", "author": "Masakhane", + "author_url": "https://www.masakhane.io", "url": "https://huggingface.co/datasets/masakhane/afrixnli", - "n_languages": 18.0, + "n_languages": 18, "tasks": [ "classification" ], @@ -8504,9 +8519,10 @@ }, { "name": "Okapi HellaSwag", - "author": "Okapi", + "author": "Academic", + "author_url": null, "url": "https://huggingface.co/datasets/jon-tow/okapi_hellaswag", - "n_languages": 31.0, + "n_languages": 31, "tasks": [ "question_answering" ], @@ -8517,8 +8533,9 @@ { "name": "WikiANN / PAN-X", "author": "Academic", + "author_url": null, "url": "https://huggingface.co/datasets/unimelb-nlp/wikiann", - "n_languages": 176.0, + "n_languages": 176, "tasks": [ "ner" ], @@ -8529,8 +8546,9 @@ { "name": "MSVAMP", "author": "Microsoft", + "author_url": "https://microsoft.com", "url": "https://huggingface.co/datasets/Mathoctopus/MSVAMP", - "n_languages": 10.0, + "n_languages": 10, "tasks": [ "math" ], @@ -8541,8 +8559,9 @@ { "name": "XLSUM", "author": "Academic", + "author_url": null, "url": "https://huggingface.co/datasets/csebuetnlp/xlsum", - "n_languages": 45.0, + "n_languages": 45, "tasks": [ "summarization" ], @@ -8553,8 +8572,9 @@ { "name": "SEA-IFEVAL", "author": "AI Singapore", + "author_url": "https://aisingapore.org", "url": "https://huggingface.co/datasets/aisingapore/instruction_following-ifeval", - "n_languages": 7.0, + "n_languages": 7, "tasks": [ "instruction_following" ], @@ -8565,8 +8585,9 @@ { "name": "XTREME", "author": "Google", + "author_url": "https://google.com", "url": "https://huggingface.co/datasets/google/xtreme", - "n_languages": 40.0, + "n_languages": 40, "tasks": [ "translation", "classification", @@ -8580,8 +8601,9 @@ { "name": "XGLUE", "author": "Microsoft", + "author_url": "https://microsoft.com", "url": "https://huggingface.co/datasets/microsoft/xglue", - "n_languages": 18.0, + "n_languages": 18, "tasks": [ "pos" ], @@ -8592,8 +8614,9 @@ { "name": "IndicGLUE", "author": "AI4Bharat", + "author_url": "https://models.ai4bharat.org", "url": "https://huggingface.co/datasets/ai4bharat/indic_glue", - "n_languages": 11.0, + "n_languages": 11, "tasks": [ "question_answering" ], @@ -8604,8 +8627,9 @@ { "name": "Opus Gnome", "author": "Helsinki NLP", + "author_url": null, "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_gnome", - "n_languages": 187.0, + "n_languages": 187, "tasks": [ "translation" ], @@ -8616,8 +8640,9 @@ { "name": "Opus Paracrawl", "author": "Helsinki NLP", + "author_url": null, "url": "https://huggingface.co/datasets/Helsinki-NLP/opus_paracrawl", - "n_languages": 43.0, + "n_languages": 43, "tasks": [ "translation" ], @@ -8628,8 +8653,9 @@ { "name": "CCAligned", "author": "Meta", + "author_url": "https://ai.meta.com", "url": "https://huggingface.co/datasets/ahelk/ccaligned_multilingual", - "n_languages": 137.0, + "n_languages": 137, "tasks": [ "translation" ], @@ -8640,8 +8666,9 @@ { "name": "OPUS Collection", "author": "Helsinki NLP", - "url": "https://opus.nlpl.eu/", - "n_languages": 747.0, + "author_url": null, + "url": "https://opus.nlpl.eu", + "n_languages": 747, "tasks": [ "translation" ], @@ -8652,8 +8679,9 @@ { "name": "MasakhaNER", "author": "Masakhane", + "author_url": "https://www.masakhane.io", "url": "https://huggingface.co/datasets/masakhane/masakhaner", - "n_languages": 10.0, + "n_languages": 10, "tasks": [ "ner" ], @@ -8663,9 +8691,10 @@ }, { "name": "Multilingual Sentiments", - "author": null, + "author": "Academic", + "author_url": null, "url": "https://huggingface.co/datasets/tyqiangz/multilingual-sentiments", - "n_languages": 12.0, + "n_languages": 12, "tasks": [ "sentiment_analysis" ], @@ -8676,8 +8705,9 @@ { "name": "CulturaX", "author": "Academic", + "author_url": null, "url": "https://huggingface.co/datasets/uonlp/CulturaX", - "n_languages": 167.0, + "n_languages": 167, "tasks": [ "language_modeling" ], @@ -8688,8 +8718,9 @@ { "name": "Tülu 3 SFT Mixture", "author": "AllenAI", + "author_url": "https://allenai.org", "url": "https://huggingface.co/datasets/allenai/tulu-3-sft-mixture", - "n_languages": 70.0, + "n_languages": 70, "tasks": [ "instruction_following" ], @@ -8700,8 +8731,9 @@ { "name": "xP3", "author": "BigScience", + "author_url": "https://bigscience.huggingface.co", "url": "https://huggingface.co/datasets/bigscience/xP3", - "n_languages": 46.0, + "n_languages": 46, "tasks": [ "instruction_following" ], @@ -8712,8 +8744,9 @@ { "name": "Aya", "author": "Cohere", + "author_url": "https://cohere.com", "url": "https://huggingface.co/datasets/CohereForAI/aya_dataset", - "n_languages": 65.0, + "n_languages": 65, "tasks": [ "instruction_following" ], @@ -8724,8 +8757,9 @@ { "name": "Lanfrica", "author": "Lanfrica", + "author_url": "https://lanfrica.com", "url": "https://lanfrica.com/records?language=yor&task=machine%20translation", - "n_languages": 2200.0, + "n_languages": 2200, "tasks": [ "datasets" ], @@ -8736,8 +8770,9 @@ { "name": "HuggingFace Languages", "author": "HuggingFace", + "author_url": "https://huggingface.co", "url": "https://huggingface.co/languages", - "n_languages": 4680.0, + "n_languages": 4680, "tasks": [ "datasets", "models" @@ -8749,8 +8784,9 @@ { "name": "HuggingFace Multilingual Datasets", "author": "HuggingFace", + "author_url": "https://huggingface.co", "url": "https://huggingface.co/datasets?other=multilinguality:multilingual", - "n_languages": null, + "n_languages": 2012, "tasks": [ "datasets" ],