import gradio as gr from src.display.formatting import render_leaderboard_html, get_display_model_name from src.data_utils import get_length_category_list, get_length_category_df import pandas as pd import numpy as np def render_length_category_html(df, med_len_map=None): """ Render the length category table with Model Name colored by Rank (gold/silver/bronze), no Rank column. Model Name cell includes Think, Model Type badges. Overall column is always right after Model Name. Optionally, inserts Med. Len. column after Overall if med_len_map is provided. """ if df is None or df.empty: return "
No data available.
" # Compute Rank based on Overall (descending) df = df.copy() # 1. Sort so that empty strings come to the top first df = df.sort_values("Overall", key=lambda x: (x == "").astype(int)) # 2. Then sort the actual values in descending order (empty strings are already at the top, so no effect) df = df.sort_values("Overall", ascending=False, kind="mergesort").reset_index(drop=True) df["Rank_Internal"] = df["Overall"].rank(method="min", ascending=False).astype(int) # Ensure Think and Model Type columns exist for badge rendering # Rename columns to ensure exact match if "Type" not in df.columns: df["Type"] = "unknown" if "Model Type" not in df.columns: df["Model Type"] = "unknown" if "Think" not in df.columns: df["Think"] = "unknown" # Optionally add Med. Len. column if med_len_map is not None: df["Med. Len."] = df["Model Name"].map(med_len_map) # Determine display columns: Model Name, Overall, Med. Len., {Category}, (rest, excluding Rank_Internal, Model Type, Think) base_cols = [col for col in df.columns if col not in ["Rank_Internal", "Comment", "Group", "Link"]] # Find the dynamic category column (e.g., "Short", "Long", etc.) from src.data_utils import get_length_category_list category_cols = [col for col in get_length_category_list() if col in base_cols] category_col = category_cols[0] if category_cols else None # Build display_cols: Model Name, Overall, Med. Len., {Category}, (rest) display_cols = [] if "Model Name" in base_cols: display_cols.append("Model Name") if "Overall" in base_cols: display_cols.append("Overall") if "Med. Len." in base_cols: display_cols.append("Med. Len.") if "Med. Resp. Len." in base_cols: display_cols.append("Med. Resp. Len.") if category_col: display_cols.append(category_col) for col in base_cols: if col not in display_cols: display_cols.append(col) # Build HTML table html = '\n' for col in display_cols: # Info icon for Model Name, Med. Len. and Med. Resp. Len. if col == "Model Name": html += ( f'' ) elif col == "Med. Len.": html += ( f'' ) elif col == "Med. Resp. Len.": html += ( f'' ) else: html += f'' html += '\n\n' # --- Define number formatting function --- from constants import NUMERIC_COLS_CATEGORY, NUMERIC_INT_COLS_CATEGORY def format_leaderboard_cell(cell, col): # Handle NaN/empty strings if pd.isna(cell) or (isinstance(cell, str) and cell.strip() == ""): return cell try: if col in NUMERIC_INT_COLS_CATEGORY: # Integer (rounded) return str(int(round(float(cell)))) elif col in NUMERIC_COLS_CATEGORY: # Two decimal places return "{:.2f}".format(float(cell)) else: return str(cell) except Exception: return str(cell) for idx, row in df.iterrows(): html += '' for col in display_cols: cell = row[col] if col == "Model Name": # Gold/Silver/Bronze for 1/2/3 rank = row["Rank_Internal"] if rank == 1: style = "color: #ffd700; font-weight: bold; text-shadow: 0 0 4px #fff2;" elif rank == 2: style = "color: #b0b0b0; font-weight: bold;" elif rank == 3: style = "color: #cd7f32; font-weight: bold;" else: style = "color: #fff; font-weight: 600;" # Badge HTML model_type = row["Model Type"] if "Model Type" in row else "unknown" think_type = row["Think"] if "Think" in row else "unknown" type_value = row["Type"] if "Type" in row else "unknown" from src.display.formatting import get_type_badge, get_think_badge, get_model_type_badge badge_html = ( get_type_badge(type_value) + get_model_type_badge(model_type) + get_think_badge(think_type) ) display_name = get_display_model_name(str(cell)) # --- Start of new logic for tooltip --- comment_value = "" # Check if 'Comment' column exists and the value is not NaN/empty if "Comment" in row and pd.notna(row["Comment"]) and str(row["Comment"]).strip() != "": comment_value = str(row["Comment"]).strip() title_attribute = f' title="{comment_value}"' if comment_value else "" # --- End of new logic for tooltip --- # Link logic link_value = row["Link"] if "Link" in row and pd.notna(row["Link"]) and str(row["Link"]).strip() != "" else None if link_value: clickable_name = f'{display_name}' else: clickable_name = display_name html += f'' elif col == "Overall": # Show stars from src.display.formatting import get_score_stars try: unique_id = row.get("Model Name", None) unique_id = unique_id.replace(" ", "_").replace("-", "_").replace("(", "_").replace(")", "_") cell_html = get_score_stars(float(cell), unique_id=unique_id) except Exception: cell_html = str(cell) html += f'' else: html += f'' html += '\n' html += '
{col}' '' '{col}' '' '{col}' '' '{col}
{clickable_name}{badge_html}{cell_html}{format_leaderboard_cell(cell, col)}
' # Wrap in scrollable div for sticky header return f'
{html}
' def render_length_category_table(leaderboard_df=None): """ Renders a Category selector and a table showing length stats for the selected category. Uses Overall from leaderboard_df for ranking, coloring, and stars. """ import gradio as gr categories = get_length_category_list() default_category = categories[0] if categories else "" # Merge Overall from leaderboard_df def get_merged_df(selected_category): df_cat = get_length_category_df(selected_category) if selected_category else None if leaderboard_df is not None and df_cat is not None: df_merged = df_cat.copy() # Use Overall and {Category} from leaderboard_df overall_map = dict(zip(leaderboard_df["Model Name"], leaderboard_df["Overall"])) category_map = dict(zip(leaderboard_df["Model Name"], leaderboard_df[selected_category])) df_merged["Overall"] = df_merged["Model Name"].map(overall_map) df_merged[selected_category] = df_merged["Model Name"].map(category_map) # Also map Model Type and Think if "Type" in leaderboard_df.columns: type_map = dict(zip(leaderboard_df["Model Name"], leaderboard_df["Type"])) df_merged["Type"] = df_merged["Model Name"].map(type_map) if "Model Type" in leaderboard_df.columns: model_type_map = dict(zip(leaderboard_df["Model Name"], leaderboard_df["Model Type"])) df_merged["Model Type"] = df_merged["Model Name"].map(model_type_map) if "Think" in leaderboard_df.columns: think_map = dict(zip(leaderboard_df["Model Name"], leaderboard_df["Think"])) df_merged["Think"] = df_merged["Model Name"].map(think_map) # Remove rows with missing Overall or {Category} df_merged = df_merged[df_merged["Overall"].notna() & df_merged[selected_category].notna()] return df_merged return df_cat df = get_merged_df(default_category) # Prepare med_len_map if possible med_len_map = None if leaderboard_df is not None and "Med. Len." in leaderboard_df.columns: med_len_map = dict(zip(leaderboard_df["Model Name"], leaderboard_df["Med. Len."])) with gr.Column(): category_selector = gr.Dropdown( choices=categories, value=default_category, label="Select Category for Length Table", interactive=True, ) table_html = gr.HTML( value=render_length_category_html(df, med_len_map=med_len_map) if df is not None else "
No data available.
", elem_id="length-category-table" ) def update_table(selected_category): df = get_merged_df(selected_category) html = render_length_category_html(df, med_len_map=med_len_map) return html category_selector.change( fn=update_table, inputs=[category_selector], outputs=[table_html] ) return { "category_selector": category_selector, "table_html": table_html, } def create_leaderboard_tab(df, key): """ df: DataFrame to display key: "Category" or "Language" column_selector_value: default columns to select """ # Ensure df has Model, Model Type, Think columns for filtering # No need to create Model column, only use Model Name # Always ensure "Overall" column exists if "Overall" not in df.columns: return # Or handle error appropriately # No additional mapping needed since DataFrame already has columns df_state = gr.State(df) # Create DataFrame including badge information (for upper table) df_badge = df.copy() # If Overall values are in the range 0~1, convert to 0~100 if "Overall" in df_badge.columns and df_badge["Overall"].max() <= 1.0: df_badge["Overall"] = df_badge["Overall"] * 100 # Remove Group column (only in display) for col_to_drop in ["Group"]: if col_to_drop in df_badge.columns: df_badge = df_badge.drop(columns=[col_to_drop]) # Handle error if "Overall" column does not exist if "Overall" not in df_badge.columns: return # Or handle error appropriately # Always sort by "Overall" # 1. Sort so that empty strings come to the top first df_badge = df_badge.sort_values("Overall", key=lambda x: (x == "").astype(int)) # 2. Then sort the actual values in descending order (empty strings are already at the top, so no effect) df_badge = df_badge.sort_values("Overall", ascending=False, kind="mergesort").reset_index(drop=True) df_badge["Rank"] = df_badge.index + 1 # Reorder "Rank" column to be right after "Model Name" cols = df_badge.columns.tolist() if "Model Name" in cols and "Rank" in cols: model_name_idx = cols.index("Model Name") cols.remove("Rank") cols.insert(model_name_idx + 1, "Rank") df_badge = df_badge[cols] with gr.Row(): # Type Selector (Open/Proprietary) type_choices = ["Open", "Proprietary"] type_selector = gr.CheckboxGroup( choices=type_choices, value=type_choices, label="Select Type (Open/Proprietary)" ) # Model Type Selector (Instruct/Think/Hybrid) model_type_choices = ["Instruct", "Think", "Hybrid"] model_type_selector = gr.CheckboxGroup( choices=model_type_choices, value=model_type_choices, label="Select Model Type (Instruct/Think/Hybrid)" ) # Think Selector (On/Off) think_choices = ["On", "Off"] think_selector = gr.CheckboxGroup( choices=think_choices, value=think_choices, label="Select Think Mode (On/Off)" ) # Add Gradio component for selecting sort criteria (always descending) # For language leaderboard, dynamically extract language columns + Avg. Len., Parameter Size (B) if key == "Language": import re language_columns = [col for col in df_badge.columns if re.fullmatch(r"[A-Z]{2}", col) or col == "VI"] available_sort_columns = ["Overall", "Med. Len.", "Med. Resp. Len.", "Parameter Size (B)"] + language_columns else: category_columns = [ "Overall", "Med. Len.", "Med. Resp. Len.", "Parameter Size (B)", "Content Generation", "Editing", "Data Analysis", "Reasoning", "Hallucination", "Safety", "Repetition", "Summarization", "Translation", "Multi-Turn" ] available_sort_columns = [col for col in category_columns if col in df_badge.columns] sort_col_dropdown = gr.Dropdown( choices=available_sort_columns, value="Overall", label="Sort by", interactive=True, ) # Sorting function leaderboard_html = render_leaderboard_html(df_badge.round(3), overall_col="Overall", key=key) leaderboard_html_comp = gr.HTML(value=leaderboard_html, elem_id="leaderboard-table") # Filtering logic for new selectors def unified_filter(types, model_types, thinks, df, sort_col): # Apply search filter first filtered = df.copy() if "Type" in filtered.columns and (not types or len(types) == 0): types = filtered["Type"].unique().tolist() if "Model Type" in filtered.columns and (not model_types or len(model_types) == 0): model_types = filtered["Model Type"].unique().tolist() if "Think" in filtered.columns and (not thinks or len(thinks) == 0): thinks = filtered["Think"].unique().tolist() # Defensive: always ensure "Overall" exists if "Type" in filtered.columns: filtered["Type"] = filtered["Type"].fillna("").astype(str) types_norm = [v.lower().strip() for v in types] filtered = filtered[filtered["Type"].str.lower().str.strip().isin(types_norm)] if "Model Type" in filtered.columns: filtered["Model Type"] = filtered["Model Type"].fillna("").astype(str) model_types_norm = [v.lower().strip() for v in model_types] filtered = filtered[filtered["Model Type"].str.lower().str.strip().isin(model_types_norm)] if "Think" in filtered.columns: filtered["Think"] = filtered["Think"].fillna("").astype(str) thinks_norm = [v.lower().strip() for v in thinks] filtered = filtered[filtered["Think"].str.lower().str.strip().isin(thinks_norm)] if "Overall" not in filtered.columns: html = "
No 'Overall' column found in data. Please check your input data.
" return html, sort_col # Always sort in descending order # To make empty strings come to the top, replace them with np.inf and sort descending sort_col_for_sort = filtered[sort_col].replace('', np.inf).astype(float) filtered = filtered.assign(sort_col_tmp=sort_col_for_sort) filtered = filtered.sort_values('sort_col_tmp', ascending=False, kind="mergesort").reset_index(drop=True) filtered = filtered.drop(columns=['sort_col_tmp']) # Add "Rank" column and reorder it to be right after "Model Name" filtered["Rank"] = filtered.index + 1 cols = filtered.columns.tolist() if "Model Name" in cols and "Rank" in cols: model_name_idx = cols.index("Model Name") cols.remove("Rank") cols.insert(model_name_idx + 1, "Rank") filtered = filtered[cols] # Always remove Group column for col_to_drop in ["Group"]: if col_to_drop in filtered.columns: filtered = filtered.drop(columns=[col_to_drop]) filtered._sort_col = sort_col # Extract top-5 models (currently sorted in descending order) top5_models = [] if sort_col in filtered.columns and "Model Name" in filtered.columns: # 1. Sort so that empty strings come to the top first sort_col_for_sort = filtered[sort_col].replace('', np.inf).astype(float) filtered_df_sorted = filtered.assign(sort_col_tmp=sort_col_for_sort) filtered_df_sorted = filtered_df_sorted.sort_values('sort_col_tmp', ascending=False, kind="mergesort").reset_index(drop=True) top5_models = filtered_df_sorted["Model Name"].tolist()[:5] return render_leaderboard_html(filtered, overall_col="Overall", key=key), sort_col, top5_models # Download CSV function def dataframe_to_csv(data): import pandas as pd # Convert if data is not a DataFrame if isinstance(data, pd.DataFrame): df = data.copy() # Create a copy to avoid modifying the original DataFrame in memory else: df = pd.DataFrame(data) # Apply get_display_model_name to the "Model Name" column if it exists if "Model Name" in df.columns: df["Model Name"] = df["Model Name"].apply(get_display_model_name) csv_path = f"truebench_{key}.csv" df.to_csv(csv_path, index=False) return csv_path # Add DownloadButton (using CSS class) with gr.Row(): with gr.Column(scale=1): pass # Empty space with gr.Column(scale=0): download_btn = gr.DownloadButton( label="📥 Download to CSV", value=dataframe_to_csv, inputs=[df_state], visible=True, elem_classes=["custom-download-btn"] ) # Add custom CSS custom_css = """ """ gr.HTML(custom_css) sort_col_dropdown.change( fn=unified_filter, inputs=[type_selector, model_type_selector, think_selector, df_state, sort_col_dropdown], outputs=[leaderboard_html_comp, sort_col_dropdown, gr.State()] # Add top5_models ) type_selector.change( fn=unified_filter, inputs=[type_selector, model_type_selector, think_selector, df_state, sort_col_dropdown], outputs=[leaderboard_html_comp, sort_col_dropdown, gr.State()] ) model_type_selector.change( fn=unified_filter, inputs=[type_selector, model_type_selector, think_selector, df_state, sort_col_dropdown], outputs=[leaderboard_html_comp, sort_col_dropdown, gr.State()] ) think_selector.change( fn=unified_filter, inputs=[type_selector, model_type_selector, think_selector, df_state, sort_col_dropdown], outputs=[leaderboard_html_comp, sort_col_dropdown, gr.State()] ) return { "type_selector": type_selector, "model_type_selector": model_type_selector, "think_selector": think_selector, "leaderboard_html_comp": leaderboard_html_comp, "sort_col_dropdown": sort_col_dropdown, "df_state": df_state, "unified_filter": unified_filter # Exposed for direct external call }