import gradio as gr
from src.display.formatting import render_leaderboard_html, get_display_model_name
from src.data_utils import get_length_category_list, get_length_category_df
import pandas as pd
import numpy as np
def render_length_category_html(df, med_len_map=None):
"""
Render the length category table with Model Name colored by Rank (gold/silver/bronze), no Rank column.
Model Name cell includes Think, Model Type badges. Overall column is always right after Model Name.
Optionally, inserts Med. Len. column after Overall if med_len_map is provided.
"""
if df is None or df.empty:
return "
No data available.
"
# Compute Rank based on Overall (descending)
df = df.copy()
# 1. Sort so that empty strings come to the top first
df = df.sort_values("Overall", key=lambda x: (x == "").astype(int))
# 2. Then sort the actual values in descending order (empty strings are already at the top, so no effect)
df = df.sort_values("Overall", ascending=False, kind="mergesort").reset_index(drop=True)
df["Rank_Internal"] = df["Overall"].rank(method="min", ascending=False).astype(int)
# Ensure Think and Model Type columns exist for badge rendering
# Rename columns to ensure exact match
if "Type" not in df.columns:
df["Type"] = "unknown"
if "Model Type" not in df.columns:
df["Model Type"] = "unknown"
if "Think" not in df.columns:
df["Think"] = "unknown"
# Optionally add Med. Len. column
if med_len_map is not None:
df["Med. Len."] = df["Model Name"].map(med_len_map)
# Determine display columns: Model Name, Overall, Med. Len., {Category}, (rest, excluding Rank_Internal, Model Type, Think)
base_cols = [col for col in df.columns if col not in ["Rank_Internal", "Comment", "Group", "Link"]]
# Find the dynamic category column (e.g., "Short", "Long", etc.)
from src.data_utils import get_length_category_list
category_cols = [col for col in get_length_category_list() if col in base_cols]
category_col = category_cols[0] if category_cols else None
# Build display_cols: Model Name, Overall, Med. Len., {Category}, (rest)
display_cols = []
if "Model Name" in base_cols:
display_cols.append("Model Name")
if "Overall" in base_cols:
display_cols.append("Overall")
if "Med. Len." in base_cols:
display_cols.append("Med. Len.")
if "Med. Resp. Len." in base_cols:
display_cols.append("Med. Resp. Len.")
if category_col:
display_cols.append(category_col)
for col in base_cols:
if col not in display_cols:
display_cols.append(col)
# Build HTML table
html = '\n'
for col in display_cols:
# Info icon for Model Name, Med. Len. and Med. Resp. Len.
if col == "Model Name":
html += (
f'{col}'
'ⓘ'
' | '
)
elif col == "Med. Len.":
html += (
f'{col}'
'ⓘ'
' | '
)
elif col == "Med. Resp. Len.":
html += (
f'{col}'
'ⓘ'
' | '
)
else:
html += f'{col} | '
html += '
\n\n'
# --- Define number formatting function ---
from constants import NUMERIC_COLS_CATEGORY, NUMERIC_INT_COLS_CATEGORY
def format_leaderboard_cell(cell, col):
# Handle NaN/empty strings
if pd.isna(cell) or (isinstance(cell, str) and cell.strip() == ""):
return cell
try:
if col in NUMERIC_INT_COLS_CATEGORY:
# Integer (rounded)
return str(int(round(float(cell))))
elif col in NUMERIC_COLS_CATEGORY:
# Two decimal places
return "{:.2f}".format(float(cell))
else:
return str(cell)
except Exception:
return str(cell)
for idx, row in df.iterrows():
html += ''
for col in display_cols:
cell = row[col]
if col == "Model Name":
# Gold/Silver/Bronze for 1/2/3
rank = row["Rank_Internal"]
if rank == 1:
style = "color: #ffd700; font-weight: bold; text-shadow: 0 0 4px #fff2;"
elif rank == 2:
style = "color: #b0b0b0; font-weight: bold;"
elif rank == 3:
style = "color: #cd7f32; font-weight: bold;"
else:
style = "color: #fff; font-weight: 600;"
# Badge HTML
model_type = row["Model Type"] if "Model Type" in row else "unknown"
think_type = row["Think"] if "Think" in row else "unknown"
type_value = row["Type"] if "Type" in row else "unknown"
from src.display.formatting import get_type_badge, get_think_badge, get_model_type_badge
badge_html = (
get_type_badge(type_value)
+ get_model_type_badge(model_type)
+ get_think_badge(think_type)
)
display_name = get_display_model_name(str(cell))
# --- Start of new logic for tooltip ---
comment_value = ""
# Check if 'Comment' column exists and the value is not NaN/empty
if "Comment" in row and pd.notna(row["Comment"]) and str(row["Comment"]).strip() != "":
comment_value = str(row["Comment"]).strip()
title_attribute = f' title="{comment_value}"' if comment_value else ""
# --- End of new logic for tooltip ---
# Link logic
link_value = row["Link"] if "Link" in row and pd.notna(row["Link"]) and str(row["Link"]).strip() != "" else None
if link_value:
clickable_name = f'{display_name}'
else:
clickable_name = display_name
html += f'{clickable_name}{badge_html} | '
elif col == "Overall":
# Show stars
from src.display.formatting import get_score_stars
try:
unique_id = row.get("Model Name", None)
unique_id = unique_id.replace(" ", "_").replace("-", "_").replace("(", "_").replace(")", "_")
cell_html = get_score_stars(float(cell), unique_id=unique_id)
except Exception:
cell_html = str(cell)
html += f'{cell_html} | '
else:
html += f'{format_leaderboard_cell(cell, col)} | '
html += '
\n'
html += '
'
# Wrap in scrollable div for sticky header
return f'{html}
'
def render_length_category_table(leaderboard_df=None):
"""
Renders a Category selector and a table showing length stats for the selected category.
Uses Overall from leaderboard_df for ranking, coloring, and stars.
"""
import gradio as gr
categories = get_length_category_list()
default_category = categories[0] if categories else ""
# Merge Overall from leaderboard_df
def get_merged_df(selected_category):
df_cat = get_length_category_df(selected_category) if selected_category else None
if leaderboard_df is not None and df_cat is not None:
df_merged = df_cat.copy()
# Use Overall and {Category} from leaderboard_df
overall_map = dict(zip(leaderboard_df["Model Name"], leaderboard_df["Overall"]))
category_map = dict(zip(leaderboard_df["Model Name"], leaderboard_df[selected_category]))
df_merged["Overall"] = df_merged["Model Name"].map(overall_map)
df_merged[selected_category] = df_merged["Model Name"].map(category_map)
# Also map Model Type and Think
if "Type" in leaderboard_df.columns:
type_map = dict(zip(leaderboard_df["Model Name"], leaderboard_df["Type"]))
df_merged["Type"] = df_merged["Model Name"].map(type_map)
if "Model Type" in leaderboard_df.columns:
model_type_map = dict(zip(leaderboard_df["Model Name"], leaderboard_df["Model Type"]))
df_merged["Model Type"] = df_merged["Model Name"].map(model_type_map)
if "Think" in leaderboard_df.columns:
think_map = dict(zip(leaderboard_df["Model Name"], leaderboard_df["Think"]))
df_merged["Think"] = df_merged["Model Name"].map(think_map)
# Remove rows with missing Overall or {Category}
df_merged = df_merged[df_merged["Overall"].notna() & df_merged[selected_category].notna()]
return df_merged
return df_cat
df = get_merged_df(default_category)
# Prepare med_len_map if possible
med_len_map = None
if leaderboard_df is not None and "Med. Len." in leaderboard_df.columns:
med_len_map = dict(zip(leaderboard_df["Model Name"], leaderboard_df["Med. Len."]))
with gr.Column():
category_selector = gr.Dropdown(
choices=categories,
value=default_category,
label="Select Category for Length Table",
interactive=True,
)
table_html = gr.HTML(
value=render_length_category_html(df, med_len_map=med_len_map) if df is not None else "No data available.
",
elem_id="length-category-table"
)
def update_table(selected_category):
df = get_merged_df(selected_category)
html = render_length_category_html(df, med_len_map=med_len_map)
return html
category_selector.change(
fn=update_table,
inputs=[category_selector],
outputs=[table_html]
)
return {
"category_selector": category_selector,
"table_html": table_html,
}
def create_leaderboard_tab(df, key):
"""
df: DataFrame to display
key: "Category" or "Language"
column_selector_value: default columns to select
"""
# Ensure df has Model, Model Type, Think columns for filtering
# No need to create Model column, only use Model Name
# Always ensure "Overall" column exists
if "Overall" not in df.columns:
return # Or handle error appropriately
# No additional mapping needed since DataFrame already has columns
df_state = gr.State(df)
# Create DataFrame including badge information (for upper table)
df_badge = df.copy()
# If Overall values are in the range 0~1, convert to 0~100
if "Overall" in df_badge.columns and df_badge["Overall"].max() <= 1.0:
df_badge["Overall"] = df_badge["Overall"] * 100
# Remove Group column (only in display)
for col_to_drop in ["Group"]:
if col_to_drop in df_badge.columns:
df_badge = df_badge.drop(columns=[col_to_drop])
# Handle error if "Overall" column does not exist
if "Overall" not in df_badge.columns:
return # Or handle error appropriately
# Always sort by "Overall"
# 1. Sort so that empty strings come to the top first
df_badge = df_badge.sort_values("Overall", key=lambda x: (x == "").astype(int))
# 2. Then sort the actual values in descending order (empty strings are already at the top, so no effect)
df_badge = df_badge.sort_values("Overall", ascending=False, kind="mergesort").reset_index(drop=True)
df_badge["Rank"] = df_badge.index + 1
# Reorder "Rank" column to be right after "Model Name"
cols = df_badge.columns.tolist()
if "Model Name" in cols and "Rank" in cols:
model_name_idx = cols.index("Model Name")
cols.remove("Rank")
cols.insert(model_name_idx + 1, "Rank")
df_badge = df_badge[cols]
with gr.Row():
# Type Selector (Open/Proprietary)
type_choices = ["Open", "Proprietary"]
type_selector = gr.CheckboxGroup(
choices=type_choices,
value=type_choices,
label="Select Type (Open/Proprietary)"
)
# Model Type Selector (Instruct/Think/Hybrid)
model_type_choices = ["Instruct", "Think", "Hybrid"]
model_type_selector = gr.CheckboxGroup(
choices=model_type_choices,
value=model_type_choices,
label="Select Model Type (Instruct/Think/Hybrid)"
)
# Think Selector (On/Off)
think_choices = ["On", "Off"]
think_selector = gr.CheckboxGroup(
choices=think_choices,
value=think_choices,
label="Select Think Mode (On/Off)"
)
# Add Gradio component for selecting sort criteria (always descending)
# For language leaderboard, dynamically extract language columns + Avg. Len., Parameter Size (B)
if key == "Language":
import re
language_columns = [col for col in df_badge.columns if re.fullmatch(r"[A-Z]{2}", col) or col == "VI"]
available_sort_columns = ["Overall", "Med. Len.", "Med. Resp. Len.", "Parameter Size (B)"] + language_columns
else:
category_columns = [
"Overall", "Med. Len.", "Med. Resp. Len.", "Parameter Size (B)", "Content Generation", "Editing", "Data Analysis", "Reasoning",
"Hallucination", "Safety", "Repetition", "Summarization", "Translation", "Multi-Turn"
]
available_sort_columns = [col for col in category_columns if col in df_badge.columns]
sort_col_dropdown = gr.Dropdown(
choices=available_sort_columns,
value="Overall",
label="Sort by",
interactive=True,
)
# Sorting function
leaderboard_html = render_leaderboard_html(df_badge.round(3), overall_col="Overall", key=key)
leaderboard_html_comp = gr.HTML(value=leaderboard_html, elem_id="leaderboard-table")
# Filtering logic for new selectors
def unified_filter(types, model_types, thinks, df, sort_col):
# Apply search filter first
filtered = df.copy()
if "Type" in filtered.columns and (not types or len(types) == 0):
types = filtered["Type"].unique().tolist()
if "Model Type" in filtered.columns and (not model_types or len(model_types) == 0):
model_types = filtered["Model Type"].unique().tolist()
if "Think" in filtered.columns and (not thinks or len(thinks) == 0):
thinks = filtered["Think"].unique().tolist()
# Defensive: always ensure "Overall" exists
if "Type" in filtered.columns:
filtered["Type"] = filtered["Type"].fillna("").astype(str)
types_norm = [v.lower().strip() for v in types]
filtered = filtered[filtered["Type"].str.lower().str.strip().isin(types_norm)]
if "Model Type" in filtered.columns:
filtered["Model Type"] = filtered["Model Type"].fillna("").astype(str)
model_types_norm = [v.lower().strip() for v in model_types]
filtered = filtered[filtered["Model Type"].str.lower().str.strip().isin(model_types_norm)]
if "Think" in filtered.columns:
filtered["Think"] = filtered["Think"].fillna("").astype(str)
thinks_norm = [v.lower().strip() for v in thinks]
filtered = filtered[filtered["Think"].str.lower().str.strip().isin(thinks_norm)]
if "Overall" not in filtered.columns:
html = "No 'Overall' column found in data. Please check your input data.
"
return html, sort_col
# Always sort in descending order
# To make empty strings come to the top, replace them with np.inf and sort descending
sort_col_for_sort = filtered[sort_col].replace('', np.inf).astype(float)
filtered = filtered.assign(sort_col_tmp=sort_col_for_sort)
filtered = filtered.sort_values('sort_col_tmp', ascending=False, kind="mergesort").reset_index(drop=True)
filtered = filtered.drop(columns=['sort_col_tmp'])
# Add "Rank" column and reorder it to be right after "Model Name"
filtered["Rank"] = filtered.index + 1
cols = filtered.columns.tolist()
if "Model Name" in cols and "Rank" in cols:
model_name_idx = cols.index("Model Name")
cols.remove("Rank")
cols.insert(model_name_idx + 1, "Rank")
filtered = filtered[cols]
# Always remove Group column
for col_to_drop in ["Group"]:
if col_to_drop in filtered.columns:
filtered = filtered.drop(columns=[col_to_drop])
filtered._sort_col = sort_col
# Extract top-5 models (currently sorted in descending order)
top5_models = []
if sort_col in filtered.columns and "Model Name" in filtered.columns:
# 1. Sort so that empty strings come to the top first
sort_col_for_sort = filtered[sort_col].replace('', np.inf).astype(float)
filtered_df_sorted = filtered.assign(sort_col_tmp=sort_col_for_sort)
filtered_df_sorted = filtered_df_sorted.sort_values('sort_col_tmp', ascending=False, kind="mergesort").reset_index(drop=True)
top5_models = filtered_df_sorted["Model Name"].tolist()[:5]
return render_leaderboard_html(filtered, overall_col="Overall", key=key), sort_col, top5_models
# Download CSV function
def dataframe_to_csv(data):
import pandas as pd
# Convert if data is not a DataFrame
if isinstance(data, pd.DataFrame):
df = data.copy() # Create a copy to avoid modifying the original DataFrame in memory
else:
df = pd.DataFrame(data)
# Apply get_display_model_name to the "Model Name" column if it exists
if "Model Name" in df.columns:
df["Model Name"] = df["Model Name"].apply(get_display_model_name)
csv_path = f"truebench_{key}.csv"
df.to_csv(csv_path, index=False)
return csv_path
# Add DownloadButton (using CSS class)
with gr.Row():
with gr.Column(scale=1):
pass # Empty space
with gr.Column(scale=0):
download_btn = gr.DownloadButton(
label="📥 Download to CSV",
value=dataframe_to_csv,
inputs=[df_state],
visible=True,
elem_classes=["custom-download-btn"]
)
# Add custom CSS
custom_css = """
"""
gr.HTML(custom_css)
sort_col_dropdown.change(
fn=unified_filter,
inputs=[type_selector, model_type_selector, think_selector, df_state, sort_col_dropdown],
outputs=[leaderboard_html_comp, sort_col_dropdown, gr.State()] # Add top5_models
)
type_selector.change(
fn=unified_filter,
inputs=[type_selector, model_type_selector, think_selector, df_state, sort_col_dropdown],
outputs=[leaderboard_html_comp, sort_col_dropdown, gr.State()]
)
model_type_selector.change(
fn=unified_filter,
inputs=[type_selector, model_type_selector, think_selector, df_state, sort_col_dropdown],
outputs=[leaderboard_html_comp, sort_col_dropdown, gr.State()]
)
think_selector.change(
fn=unified_filter,
inputs=[type_selector, model_type_selector, think_selector, df_state, sort_col_dropdown],
outputs=[leaderboard_html_comp, sort_col_dropdown, gr.State()]
)
return {
"type_selector": type_selector,
"model_type_selector": model_type_selector,
"think_selector": think_selector,
"leaderboard_html_comp": leaderboard_html_comp,
"sort_col_dropdown": sort_col_dropdown,
"df_state": df_state,
"unified_filter": unified_filter # Exposed for direct external call
}