File size: 2,403 Bytes
29546b4
91e8a06
6dff40c
29546b4
 
 
 
 
 
91e8a06
32b707a
 
29546b4
4f3c2a8
ac7d54a
bf17f55
ac7d54a
 
bf17f55
 
01ea22b
0b8a8d2
 
 
 
 
 
 
 
01ea22b
32b707a
 
29546b4
 
 
bf17f55
58733e4
29546b4
b98f07f
6d848c3
e7226cc
 
29546b4
e7226cc
3aa78c2
bf17f55
 
 
f7d1b51
3aa78c2
 
b98f07f
fed47e0
8fc70f8
3aa78c2
fed47e0
58733e4
2a73469
 
fccd458
c2365a2
 
 
 
 
 
 
 
 
 
 
2a860f6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from dataclasses import dataclass
from enum import Enum

@dataclass
class Task:
    benchmark: str
    metric: str
    col_name: str


# Select your tasks here
# ---------------------------------------------------
class Tasks(Enum):
    # task_key in the json file, metric_key in the json file, name to display in the leaderboard 
    task0 = Task("lid", "acc", "LID")
    task1 = Task("topic_classification", "acc", "TC")
    task2 = Task("rc_qa", "acc", "RC-QA")
    task3 = Task("nli", "acc", "NLI")
    task4 = Task("machine_translation_xx_eng", "chrf", "MT (xx-en)")
    task5 = Task("machine_translation_eng_xx", "chrf", "MT (en-xx)")

class SpeechTasks(Enum):
    # task_key in the json file, metric_key in the json file, name to display in the leaderboard
    task0 = Task("lid", "acc", "LID")
    task1 = Task("topic_classification", "acc", "TC")
    task2 = Task("rc_qa", "acc", "RC-QA")
    task3 = Task("asr", "cer", "ASR")
    task4 = Task("s2tt", "chrf", "S2TT")

NUM_FEWSHOT = 0 # Change with your few shot
# ---------------------------------------------------



# Your leaderboard name
TITLE = """<h1 align="center" id="space-title">mSTEB Leaderboard</h1>"""

# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
This leaderboard has the results of evaluation of models on mSTEB benchmark.
"""

# Which evaluations are you running? how can people reproduce what you have?
LLM_BENCHMARKS_TEXT = f"""
## Reproducibility
To reproduce our results please look at the github page for mSTEB:

https://github.com/McGill-NLP/mSTEB

"""

EVALUATION_QUEUE_TEXT = """
## Submit your results
Please provide the model name, csv file and select the appropriate result type to upload your evaluation results for mSTEB. 

Kindly format the results in the same way as provided in the sample csv files below. 
"""

CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""
@misc{beyene2025mstebmassivelymultilingualevaluation,
  title  = {mSTEB: Massively Multilingual Evaluation of LLMs on Speech and Text Tasks}, 
  author = {Luel Hagos Beyene and Vivek Verma and Min Ma and Jesujoba O. Alabi 
            and Fabian David Schmidt and Joyce Nakatumba-Nabende and 
            David Ifeoluwa Adelani},
  year   = {2025},
  eprint = {2506.08400},
  archivePrefix = {arXiv},
  primaryClass  = {cs.CL},
  url = {https://arxiv.org/abs/2506.08400}
}
"""