File size: 4,311 Bytes
f2a3e70
bd5b131
 
87ad165
bd5b131
87ad165
 
bd5b131
87ad165
 
 
bd5b131
87ad165
bd5b131
7a9f32a
87ad165
 
 
 
 
 
fd35185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd5b131
fd35185
 
 
 
bd5b131
fd35185
 
b314a79
 
 
fd35185
b314a79
fd35185
b314a79
 
 
 
 
fd35185
de1d88f
07dade5
b314a79
7a9f32a
de1d88f
7a9f32a
 
 
 
 
 
 
 
f2a3e70
fd35185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd5b131
 
fd35185
 
 
bd5b131
 
fd35185
bd5b131
 
 
 
87ad165
7a9f32a
 
 
 
bd5b131
87ad165
e19029d
07dade5
bd5b131
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import json
import re

import gradio as gr
import numpy
import pandas as pd

from src.display.css_html_js import custom_css
from src.about import (
    INTRODUCTION_TEXT,
    TITLE,
    AUTHORS,
)
from src.display.formatting import make_clickable_model
from plot_results import create_performance_plot

demo = gr.Blocks(css=custom_css)
with demo:
    gr.HTML(TITLE)
    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

    # Load dataframe from JSON
    with open("benchmark_report.json", "r") as f:
        json_data = json.load(f)
    
    # Create dataframe from JSON data
    leaderboard_df = pd.DataFrame(json_data)
    
    # Rename columns for consistency
    leaderboard_df = leaderboard_df.rename(columns={
        "Model Name": "Model Path",
        "Model Size": "Params"
    })
    
    # Calculate overall benchmark score as average of Avg (object) and Avg (country)
    leaderboard_df["Avg"] = (leaderboard_df["Avg (object)"] + leaderboard_df["Avg (country)"]) / 2
    
    # Select and reorder columns for display (removed Percentage Questions Parseable)
    display_columns = [
        "Model Path", "Params", "Avg",
        "Avg (object)", "Avg (country)", 
        "History (object)", "History (country)",
        "Geography (object)", "Geography (country)",
        "Art & Entertainment (object)", "Art & Entertainment (country)",
        "Culture & Tradition (object)", "Culture & Tradition (country)"
    ]
    
    leaderboard_df = leaderboard_df[display_columns]

    # Convert Params column - replace "-" with NaN and convert numeric strings to float
    leaderboard_df["Params"] = leaderboard_df["Params"].replace("-", numpy.nan)
    # Convert numeric strings directly to float (no regex needed since values are already clean numbers)
    leaderboard_df.loc[leaderboard_df["Params"].notna(), "Params"] = leaderboard_df.loc[leaderboard_df["Params"].notna(), "Params"].astype(float)

    # Sort by benchmark score
    leaderboard_df = leaderboard_df.sort_values(by=["Avg"], ascending=[False])
    
    # Print model names and scores to console before HTML formatting
    print("\n===== MODEL RESULTS =====")
    print("Avg is calculated as: (Avg (object) + Avg (country)) / 2")
    for index, row in leaderboard_df.iterrows():
        print(f"{row['Model Path']}: {row['Avg']:.2f}")
    print("========================\n")
    
    # Apply HTML formatting for display
    leaderboard_df["Model Path"] = leaderboard_df["Model Path"].apply(lambda x: make_clickable_model(x))
    
    # Rename column for display
    leaderboard_df = leaderboard_df.rename(columns={"Model Path": "Model"})
    leaderboard_df.to_csv("output.csv")
    
    # Set midpoint for gradient coloring based on data ranges

    leaderboard_df_styled = leaderboard_df.style.background_gradient(
        cmap="RdYlGn"
    )
    leaderboard_df_styled = leaderboard_df_styled.background_gradient(
        cmap="RdYlGn_r", 
        subset=['Params'],
        vmax=150
    )

    # Set up number formatting (removed Percentage Questions Parseable)
    rounding = {
        "Avg": "{:.2f}",
        "Params": "{:.0f}",
        "Avg (object)": "{:.2f}",
        "Avg (country)": "{:.2f}",
        "History (object)": "{:.2f}",
        "History (country)": "{:.2f}",
        "Geography (object)": "{:.2f}",
        "Geography (country)": "{:.2f}",
        "Art & Entertainment (object)": "{:.2f}",
        "Art & Entertainment (country)": "{:.2f}",
        "Culture & Tradition (object)": "{:.2f}",
        "Culture & Tradition (country)": "{:.2f}"
    }
    leaderboard_df_styled = leaderboard_df_styled.format(rounding)

    # Create dataframe component with appropriate datatypes
    datatypes = ['markdown', 'number'] + ['number'] * (len(display_columns) - 1)
    
    leaderboard_table = gr.components.Dataframe(
        value=leaderboard_df_styled,
        datatype=datatypes,
        elem_id="leaderboard-table",
        interactive=False,
        visible=True,
    )

    # Create and show the performance plot below the table
    fig = create_performance_plot()
    plot = gr.Plot(value=fig, elem_id="performance-plot")

    gr.Markdown(AUTHORS, elem_classes="markdown-text")

    # csv = gr.File(interactive=False, value="output.csv", visible=True)

    demo.queue(default_concurrency_limit=40).launch()