File size: 943 Bytes
a9abac4
c8be55c
a9abac4
8fcbcdb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8efcf6
1c17c69
99931b9
1c17c69
a9abac4
c8be55c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
NEWLINE = "\n"

TITLE = """<h1 align="center" id="space-title">Roblox LLM Leaderboard</h1>"""

TASK_DESCRIPTIONS = [
    {
        "name": "RobloxQA",
        "link": "https://huggingface.co/datasets/boatbomber/RobloxQA-v1.0",
        "description": "Multiple choice question answering about Roblox APIs and concepts.",
    },
    {
        "name": "RobloxQA_OpenEnded",
        "link": "https://huggingface.co/datasets/boatbomber/RobloxQA-OpenEnded-v1.0",
        "description": (
            "Question answering about Roblox APIs and concepts without multiple choices. "
            "Response correctness judged by an ensemble of reasoning LLMs by comparing the generated answer to the correct answer."
        ),
    },
]

INTRODUCTION_TEXT = f"""
Tracking LLM capabilities regarding Roblox game development.

### Benchmarks:

{NEWLINE.join([f"- [{task['name']}]({task['link']}): {task['description']}" for task in TASK_DESCRIPTIONS])}
"""