Spaces:
Running
Running
import gradio as gr | |
from src.display.formatting import render_leaderboard_html, get_display_model_name | |
from src.data_utils import get_length_category_list, get_length_category_df | |
import pandas as pd | |
import numpy as np | |
def render_length_category_html(df, med_len_map=None): | |
""" | |
Render the length category table with Model Name colored by Rank (gold/silver/bronze), no Rank column. | |
Model Name cell includes Think, Model Type badges. Overall column is always right after Model Name. | |
Optionally, inserts Med. Len. column after Overall if med_len_map is provided. | |
""" | |
if df is None or df.empty: | |
return "<div>No data available.</div>" | |
# Compute Rank based on Overall (descending) | |
df = df.copy() | |
# 1. Sort so that empty strings come to the top first | |
df = df.sort_values("Overall", key=lambda x: (x == "").astype(int)) | |
# 2. Then sort the actual values in descending order (empty strings are already at the top, so no effect) | |
df = df.sort_values("Overall", ascending=False, kind="mergesort").reset_index(drop=True) | |
df["Rank_Internal"] = df["Overall"].rank(method="min", ascending=False).astype(int) | |
# Ensure Think and Model Type columns exist for badge rendering | |
# Rename columns to ensure exact match | |
if "Type" not in df.columns: | |
df["Type"] = "unknown" | |
if "Model Type" not in df.columns: | |
df["Model Type"] = "unknown" | |
if "Think" not in df.columns: | |
df["Think"] = "unknown" | |
# Optionally add Med. Len. column | |
if med_len_map is not None: | |
df["Med. Len."] = df["Model Name"].map(med_len_map) | |
# Determine display columns: Model Name, Overall, Med. Len., {Category}, (rest, excluding Rank_Internal, Model Type, Think) | |
base_cols = [col for col in df.columns if col not in ["Rank_Internal", "Comment", "Group", "Link"]] | |
# Find the dynamic category column (e.g., "Short", "Long", etc.) | |
from src.data_utils import get_length_category_list | |
category_cols = [col for col in get_length_category_list() if col in base_cols] | |
category_col = category_cols[0] if category_cols else None | |
# Build display_cols: Model Name, Overall, Med. Len., {Category}, (rest) | |
display_cols = [] | |
if "Model Name" in base_cols: | |
display_cols.append("Model Name") | |
if "Overall" in base_cols: | |
display_cols.append("Overall") | |
if "Med. Len." in base_cols: | |
display_cols.append("Med. Len.") | |
if "Med. Resp. Len." in base_cols: | |
display_cols.append("Med. Resp. Len.") | |
if category_col: | |
display_cols.append(category_col) | |
for col in base_cols: | |
if col not in display_cols: | |
display_cols.append(col) | |
# Build HTML table | |
html = '<table class="pretty-leaderboard-table">\n<thead><tr>' | |
for col in display_cols: | |
# Info icon for Model Name, Med. Len. and Med. Resp. Len. | |
if col == "Model Name": | |
html += ( | |
f'<th>{col}' | |
'<span class="info-icon" title="Hovering the mouse displays additional details, and clicking the model name navigates to the corresponding page.">ⓘ</span>' | |
'</th>' | |
) | |
elif col == "Med. Len.": | |
html += ( | |
f'<th>{col}' | |
'<span class="info-icon" title="Median token length of think and response for the model.">ⓘ</span>' | |
'</th>' | |
) | |
elif col == "Med. Resp. Len.": | |
html += ( | |
f'<th>{col}' | |
'<span class="info-icon" title="Median token length of the model\'s responses (excluding think).">ⓘ</span>' | |
'</th>' | |
) | |
else: | |
html += f'<th>{col}</th>' | |
html += '</tr></thead>\n<tbody>\n' | |
# --- Define number formatting function --- | |
from constants import NUMERIC_COLS_CATEGORY, NUMERIC_INT_COLS_CATEGORY | |
def format_leaderboard_cell(cell, col): | |
# Handle NaN/empty strings | |
if pd.isna(cell) or (isinstance(cell, str) and cell.strip() == ""): | |
return cell | |
try: | |
if col in NUMERIC_INT_COLS_CATEGORY: | |
# Integer (rounded) | |
return str(int(round(float(cell)))) | |
elif col in NUMERIC_COLS_CATEGORY: | |
# Two decimal places | |
return "{:.2f}".format(float(cell)) | |
else: | |
return str(cell) | |
except Exception: | |
return str(cell) | |
for idx, row in df.iterrows(): | |
html += '<tr>' | |
for col in display_cols: | |
cell = row[col] | |
if col == "Model Name": | |
# Gold/Silver/Bronze for 1/2/3 | |
rank = row["Rank_Internal"] | |
if rank == 1: | |
style = "color: #ffd700; font-weight: bold; text-shadow: 0 0 4px #fff2;" | |
elif rank == 2: | |
style = "color: #b0b0b0; font-weight: bold;" | |
elif rank == 3: | |
style = "color: #cd7f32; font-weight: bold;" | |
else: | |
style = "color: #fff; font-weight: 600;" | |
# Badge HTML | |
model_type = row["Model Type"] if "Model Type" in row else "unknown" | |
think_type = row["Think"] if "Think" in row else "unknown" | |
type_value = row["Type"] if "Type" in row else "unknown" | |
from src.display.formatting import get_type_badge, get_think_badge, get_model_type_badge | |
badge_html = ( | |
get_type_badge(type_value) | |
+ get_model_type_badge(model_type) | |
+ get_think_badge(think_type) | |
) | |
display_name = get_display_model_name(str(cell)) | |
# --- Start of new logic for tooltip --- | |
comment_value = "" | |
# Check if 'Comment' column exists and the value is not NaN/empty | |
if "Comment" in row and pd.notna(row["Comment"]) and str(row["Comment"]).strip() != "": | |
comment_value = str(row["Comment"]).strip() | |
title_attribute = f' title="{comment_value}"' if comment_value else "" | |
# --- End of new logic for tooltip --- | |
# Link logic | |
link_value = row["Link"] if "Link" in row and pd.notna(row["Link"]) and str(row["Link"]).strip() != "" else None | |
if link_value: | |
clickable_name = f'<a href="{link_value}" target="_blank" style="color:inherit;">{display_name}</a>' | |
else: | |
clickable_name = display_name | |
html += f'<td><span style="{style}"{title_attribute}>{clickable_name}</span>{badge_html}</td>' | |
elif col == "Overall": | |
# Show stars | |
from src.display.formatting import get_score_stars | |
try: | |
unique_id = row.get("Model Name", None) | |
unique_id = unique_id.replace(" ", "_").replace("-", "_").replace("(", "_").replace(")", "_") | |
cell_html = get_score_stars(float(cell), unique_id=unique_id) | |
except Exception: | |
cell_html = str(cell) | |
html += f'<td>{cell_html}</td>' | |
else: | |
html += f'<td>{format_leaderboard_cell(cell, col)}</td>' | |
html += '</tr>\n' | |
html += '</tbody></table>' | |
# Wrap in scrollable div for sticky header | |
return f'<div class="leaderboard-table-container" style="max-height:900px;overflow-y:auto;">{html}</div>' | |
def render_length_category_table(leaderboard_df=None): | |
""" | |
Renders a Category selector and a table showing length stats for the selected category. | |
Uses Overall from leaderboard_df for ranking, coloring, and stars. | |
""" | |
import gradio as gr | |
categories = get_length_category_list() | |
default_category = categories[0] if categories else "" | |
# Merge Overall from leaderboard_df | |
def get_merged_df(selected_category): | |
df_cat = get_length_category_df(selected_category) if selected_category else None | |
if leaderboard_df is not None and df_cat is not None: | |
df_merged = df_cat.copy() | |
# Use Overall and {Category} from leaderboard_df | |
overall_map = dict(zip(leaderboard_df["Model Name"], leaderboard_df["Overall"])) | |
category_map = dict(zip(leaderboard_df["Model Name"], leaderboard_df[selected_category])) | |
df_merged["Overall"] = df_merged["Model Name"].map(overall_map) | |
df_merged[selected_category] = df_merged["Model Name"].map(category_map) | |
# Also map Model Type and Think | |
if "Type" in leaderboard_df.columns: | |
type_map = dict(zip(leaderboard_df["Model Name"], leaderboard_df["Type"])) | |
df_merged["Type"] = df_merged["Model Name"].map(type_map) | |
if "Model Type" in leaderboard_df.columns: | |
model_type_map = dict(zip(leaderboard_df["Model Name"], leaderboard_df["Model Type"])) | |
df_merged["Model Type"] = df_merged["Model Name"].map(model_type_map) | |
if "Think" in leaderboard_df.columns: | |
think_map = dict(zip(leaderboard_df["Model Name"], leaderboard_df["Think"])) | |
df_merged["Think"] = df_merged["Model Name"].map(think_map) | |
# Remove rows with missing Overall or {Category} | |
df_merged = df_merged[df_merged["Overall"].notna() & df_merged[selected_category].notna()] | |
return df_merged | |
return df_cat | |
df = get_merged_df(default_category) | |
# Prepare med_len_map if possible | |
med_len_map = None | |
if leaderboard_df is not None and "Med. Len." in leaderboard_df.columns: | |
med_len_map = dict(zip(leaderboard_df["Model Name"], leaderboard_df["Med. Len."])) | |
with gr.Column(): | |
category_selector = gr.Dropdown( | |
choices=categories, | |
value=default_category, | |
label="Select Category for Length Table", | |
interactive=True, | |
) | |
table_html = gr.HTML( | |
value=render_length_category_html(df, med_len_map=med_len_map) if df is not None else "<div>No data available.</div>", | |
elem_id="length-category-table" | |
) | |
def update_table(selected_category): | |
df = get_merged_df(selected_category) | |
html = render_length_category_html(df, med_len_map=med_len_map) | |
return html | |
category_selector.change( | |
fn=update_table, | |
inputs=[category_selector], | |
outputs=[table_html] | |
) | |
return { | |
"category_selector": category_selector, | |
"table_html": table_html, | |
} | |
def create_leaderboard_tab(df, key): | |
""" | |
df: DataFrame to display | |
key: "Category" or "Language" | |
column_selector_value: default columns to select | |
""" | |
# Ensure df has Model, Model Type, Think columns for filtering | |
# No need to create Model column, only use Model Name | |
# Always ensure "Overall" column exists | |
if "Overall" not in df.columns: | |
return # Or handle error appropriately | |
# No additional mapping needed since DataFrame already has columns | |
df_state = gr.State(df) | |
# Create DataFrame including badge information (for upper table) | |
df_badge = df.copy() | |
# If Overall values are in the range 0~1, convert to 0~100 | |
if "Overall" in df_badge.columns and df_badge["Overall"].max() <= 1.0: | |
df_badge["Overall"] = df_badge["Overall"] * 100 | |
# Remove Group column (only in display) | |
for col_to_drop in ["Group"]: | |
if col_to_drop in df_badge.columns: | |
df_badge = df_badge.drop(columns=[col_to_drop]) | |
# Handle error if "Overall" column does not exist | |
if "Overall" not in df_badge.columns: | |
return # Or handle error appropriately | |
# Always sort by "Overall" | |
# 1. Sort so that empty strings come to the top first | |
df_badge = df_badge.sort_values("Overall", key=lambda x: (x == "").astype(int)) | |
# 2. Then sort the actual values in descending order (empty strings are already at the top, so no effect) | |
df_badge = df_badge.sort_values("Overall", ascending=False, kind="mergesort").reset_index(drop=True) | |
df_badge["Rank"] = df_badge.index + 1 | |
# Reorder "Rank" column to be right after "Model Name" | |
cols = df_badge.columns.tolist() | |
if "Model Name" in cols and "Rank" in cols: | |
model_name_idx = cols.index("Model Name") | |
cols.remove("Rank") | |
cols.insert(model_name_idx + 1, "Rank") | |
df_badge = df_badge[cols] | |
with gr.Row(): | |
# Type Selector (Open/Proprietary) | |
type_choices = ["Open", "Proprietary"] | |
type_selector = gr.CheckboxGroup( | |
choices=type_choices, | |
value=type_choices, | |
label="Select Type (Open/Proprietary)" | |
) | |
# Model Type Selector (Instruct/Think/Hybrid) | |
model_type_choices = ["Instruct", "Think", "Hybrid"] | |
model_type_selector = gr.CheckboxGroup( | |
choices=model_type_choices, | |
value=model_type_choices, | |
label="Select Model Type (Instruct/Think/Hybrid)" | |
) | |
# Think Selector (On/Off) | |
think_choices = ["On", "Off"] | |
think_selector = gr.CheckboxGroup( | |
choices=think_choices, | |
value=think_choices, | |
label="Select Think Mode (On/Off)" | |
) | |
# Add Gradio component for selecting sort criteria (always descending) | |
# For language leaderboard, dynamically extract language columns + Avg. Len., Parameter Size (B) | |
if key == "Language": | |
import re | |
language_columns = [col for col in df_badge.columns if re.fullmatch(r"[A-Z]{2}", col) or col == "VI"] | |
available_sort_columns = ["Overall", "Med. Len.", "Med. Resp. Len.", "Parameter Size (B)"] + language_columns | |
else: | |
category_columns = [ | |
"Overall", "Med. Len.", "Med. Resp. Len.", "Parameter Size (B)", "Content Generation", "Editing", "Data Analysis", "Reasoning", | |
"Hallucination", "Safety", "Repetition", "Summarization", "Translation", "Multi-Turn" | |
] | |
available_sort_columns = [col for col in category_columns if col in df_badge.columns] | |
sort_col_dropdown = gr.Dropdown( | |
choices=available_sort_columns, | |
value="Overall", | |
label="Sort by", | |
interactive=True, | |
) | |
# Sorting function | |
leaderboard_html = render_leaderboard_html(df_badge.round(3), overall_col="Overall", key=key) | |
leaderboard_html_comp = gr.HTML(value=leaderboard_html, elem_id="leaderboard-table") | |
# Filtering logic for new selectors | |
def unified_filter(types, model_types, thinks, df, sort_col): | |
# Apply search filter first | |
filtered = df.copy() | |
if "Type" in filtered.columns and (not types or len(types) == 0): | |
types = filtered["Type"].unique().tolist() | |
if "Model Type" in filtered.columns and (not model_types or len(model_types) == 0): | |
model_types = filtered["Model Type"].unique().tolist() | |
if "Think" in filtered.columns and (not thinks or len(thinks) == 0): | |
thinks = filtered["Think"].unique().tolist() | |
# Defensive: always ensure "Overall" exists | |
if "Type" in filtered.columns: | |
filtered["Type"] = filtered["Type"].fillna("").astype(str) | |
types_norm = [v.lower().strip() for v in types] | |
filtered = filtered[filtered["Type"].str.lower().str.strip().isin(types_norm)] | |
if "Model Type" in filtered.columns: | |
filtered["Model Type"] = filtered["Model Type"].fillna("").astype(str) | |
model_types_norm = [v.lower().strip() for v in model_types] | |
filtered = filtered[filtered["Model Type"].str.lower().str.strip().isin(model_types_norm)] | |
if "Think" in filtered.columns: | |
filtered["Think"] = filtered["Think"].fillna("").astype(str) | |
thinks_norm = [v.lower().strip() for v in thinks] | |
filtered = filtered[filtered["Think"].str.lower().str.strip().isin(thinks_norm)] | |
if "Overall" not in filtered.columns: | |
html = "<div style='color:red'>No 'Overall' column found in data. Please check your input data.</div>" | |
return html, sort_col | |
# Always sort in descending order | |
# To make empty strings come to the top, replace them with np.inf and sort descending | |
sort_col_for_sort = filtered[sort_col].replace('', np.inf).astype(float) | |
filtered = filtered.assign(sort_col_tmp=sort_col_for_sort) | |
filtered = filtered.sort_values('sort_col_tmp', ascending=False, kind="mergesort").reset_index(drop=True) | |
filtered = filtered.drop(columns=['sort_col_tmp']) | |
# Add "Rank" column and reorder it to be right after "Model Name" | |
filtered["Rank"] = filtered.index + 1 | |
cols = filtered.columns.tolist() | |
if "Model Name" in cols and "Rank" in cols: | |
model_name_idx = cols.index("Model Name") | |
cols.remove("Rank") | |
cols.insert(model_name_idx + 1, "Rank") | |
filtered = filtered[cols] | |
# Always remove Group column | |
for col_to_drop in ["Group"]: | |
if col_to_drop in filtered.columns: | |
filtered = filtered.drop(columns=[col_to_drop]) | |
filtered._sort_col = sort_col | |
# Extract top-5 models (currently sorted in descending order) | |
top5_models = [] | |
if sort_col in filtered.columns and "Model Name" in filtered.columns: | |
# 1. Sort so that empty strings come to the top first | |
sort_col_for_sort = filtered[sort_col].replace('', np.inf).astype(float) | |
filtered_df_sorted = filtered.assign(sort_col_tmp=sort_col_for_sort) | |
filtered_df_sorted = filtered_df_sorted.sort_values('sort_col_tmp', ascending=False, kind="mergesort").reset_index(drop=True) | |
top5_models = filtered_df_sorted["Model Name"].tolist()[:5] | |
return render_leaderboard_html(filtered, overall_col="Overall", key=key), sort_col, top5_models | |
# Download CSV function | |
def dataframe_to_csv(data): | |
import pandas as pd | |
# Convert if data is not a DataFrame | |
if isinstance(data, pd.DataFrame): | |
df = data.copy() # Create a copy to avoid modifying the original DataFrame in memory | |
else: | |
df = pd.DataFrame(data) | |
# Apply get_display_model_name to the "Model Name" column if it exists | |
if "Model Name" in df.columns: | |
df["Model Name"] = df["Model Name"].apply(get_display_model_name) | |
csv_path = f"truebench_{key}.csv" | |
df.to_csv(csv_path, index=False) | |
return csv_path | |
# Add DownloadButton (using CSS class) | |
with gr.Row(): | |
with gr.Column(scale=1): | |
pass # Empty space | |
with gr.Column(scale=0): | |
download_btn = gr.DownloadButton( | |
label="📥 Download to CSV", | |
value=dataframe_to_csv, | |
inputs=[df_state], | |
visible=True, | |
elem_classes=["custom-download-btn"] | |
) | |
# Add custom CSS | |
custom_css = """ | |
<style> | |
.custom-download-btn >>> a { | |
background: #e3e6f3 !important; | |
color: #222 !important; | |
border: 1px solid rgba(0, 0, 0, 0.1) !important; | |
border-radius: 6px !important; | |
padding: 1px 1px !important; | |
font-size: 13px !important; | |
font-weight: bold !important; | |
text-shadow: 0 1px 1px rgba(0,0,0,0.1) !important; | |
margin: 0 3px 3px 0 !important; | |
} | |
.custom-download-btn:hover { | |
background: #f5f6fa !important; | |
box-shadow: 0 2px 6px rgba(0, 0, 0, 0.1) !important; | |
} | |
</style> | |
""" | |
gr.HTML(custom_css) | |
sort_col_dropdown.change( | |
fn=unified_filter, | |
inputs=[type_selector, model_type_selector, think_selector, df_state, sort_col_dropdown], | |
outputs=[leaderboard_html_comp, sort_col_dropdown, gr.State()] # Add top5_models | |
) | |
type_selector.change( | |
fn=unified_filter, | |
inputs=[type_selector, model_type_selector, think_selector, df_state, sort_col_dropdown], | |
outputs=[leaderboard_html_comp, sort_col_dropdown, gr.State()] | |
) | |
model_type_selector.change( | |
fn=unified_filter, | |
inputs=[type_selector, model_type_selector, think_selector, df_state, sort_col_dropdown], | |
outputs=[leaderboard_html_comp, sort_col_dropdown, gr.State()] | |
) | |
think_selector.change( | |
fn=unified_filter, | |
inputs=[type_selector, model_type_selector, think_selector, df_state, sort_col_dropdown], | |
outputs=[leaderboard_html_comp, sort_col_dropdown, gr.State()] | |
) | |
return { | |
"type_selector": type_selector, | |
"model_type_selector": model_type_selector, | |
"think_selector": think_selector, | |
"leaderboard_html_comp": leaderboard_html_comp, | |
"sort_col_dropdown": sort_col_dropdown, | |
"df_state": df_state, | |
"unified_filter": unified_filter # Exposed for direct external call | |
} | |