Spaces:

MLE-Dojo
/

Leaderboard

Running

App Files Files Community

Jerrycool commited on Apr 26

Commit

e4014fe

verified ·

1 Parent(s): cc4532a

Update app.py

Browse files

Files changed (1) hide show

app.py +156 -62

app.py CHANGED Viewed

@@ -2,48 +2,56 @@ import gradio as gr
 import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
 # Removed Hugging Face Hub imports as they are not needed for the simplified leaderboard
-# from huggingface_hub import snapshot_download, HfApi
-from src.about import ( # Assuming these still exist and are relevant for other tabs
     CITATION_BUTTON_LABEL,
     CITATION_BUTTON_TEXT,
-    EVALUATION_QUEUE_TEXT,
     INTRODUCTION_TEXT,
     LLM_BENCHMARKS_TEXT,
     TITLE,
 )
-from src.display.css_html_js import custom_css # Keep custom CSS
-# Removed utils imports related to the old leaderboard
-# from src.display.utils import (...)
 from src.envs import REPO_ID # Keep if needed for restart_space or other functions
-# Removed constants related to old data paths and repos if not needed elsewhere
-# from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
-# Removed old data processing functions
-# from src.populate import get_evaluation_queue_df, get_leaderboard_df
-from src.submission.submit import add_new_eval # Keep submission logic
 # --- Elo Leaderboard Configuration ---
-# Data from the table provided by the user
 data = [
-    {'model': 'gpt-4o-mini', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
-    {'model': 'gpt-4o', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
-    {'model': 'o3-mini', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096},
-    # Renamed 'DeepSeek-v3' to match previous list - adjust if needed
-    {'model': 'deepseek-v3', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
-    # Renamed 'DeepSeek-r1' to match previous list - adjust if needed
-    {'model': 'deepseek-r1', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
-    # Renamed 'Gemini-2.0-Flash' to match previous list - adjust if needed
-    {'model': 'gemini-2.0-flash', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895},
-    # Renamed 'Gemini-2.0-Pro' to match previous list - adjust if needed
-    {'model': 'gemini-2.0-pro', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054},
-    # Renamed 'Gemini-2.5-Pro' to match previous list - adjust if needed
-    {'model': 'gemini-2.5-pro', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
 ]
 # Create a master DataFrame
 master_df = pd.DataFrame(data)
 # Define categories for selection (user-facing)
-CATEGORIES = ["MLE-Lite", "Tabular", "NLP", "CV", "Overall"]
 DEFAULT_CATEGORY = "Overall" # Set a default category
 # Map user-facing categories to DataFrame column names
@@ -58,33 +66,58 @@ category_to_column = {
 # --- Helper function to update leaderboard ---
 def update_leaderboard(category):
     """
-    Selects the relevant columns for the category, renames the score column
-    to 'Elo Score', sorts by score descending, and returns the DataFrame.
     """
     score_column = category_to_column.get(category)
     if score_column is None or score_column not in master_df.columns:
-        # Fallback if category or column is invalid
         print(f"Warning: Invalid category '{category}' or column '{score_column}'. Falling back to default.")
         score_column = category_to_column[DEFAULT_CATEGORY]
         if score_column not in master_df.columns: # Check fallback column too
-             return pd.DataFrame({"Model": [], "Elo Score": []}) # Return empty if still invalid
-    # Select model and the specific score column
-    df = master_df[['model', score_column]].copy()
     # Rename the score column to 'Elo Score' for consistent display
     df.rename(columns={score_column: 'Elo Score'}, inplace=True)
-    # Sort by 'Elo Score' descending
-    df.sort_values(by='Elo Score', ascending=False, inplace=True)
-    # Reset index for cleaner display (optional)
-    df.reset_index(drop=True, inplace=True)
     return df
 # --- Mock/Placeholder functions/data for other tabs ---
-# (Same as previous version - providing empty data)
 print("Warning: Evaluation queue data fetching is disabled/mocked due to leaderboard changes.")
 finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
 running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
@@ -94,38 +127,55 @@ EVAL_TYPES = ["str", "str", "str", "str"] # Define for the dataframe types
 # --- Keep restart function if relevant ---
-# (Same as previous version)
 def restart_space():
     print(f"Attempting to restart space: {REPO_ID}")
-    # Replace with your actual space restart mechanism if needed
 # --- Gradio App Definition ---
-demo = gr.Blocks(css=custom_css)
 with demo:
     gr.HTML(TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
             with gr.Column():
-                gr.Markdown("## Model Elo Rankings") # New title for the section
                 category_selector = gr.Radio(
                     choices=CATEGORIES,
-                    label="Select Category to Sort By", # Updated label
-                    value=DEFAULT_CATEGORY, # Default selection
                     interactive=True,
-                    container=False,
                 )
                 leaderboard_df_component = gr.Dataframe(
                     # Initialize with sorted data for the default category
                     value=update_leaderboard(DEFAULT_CATEGORY),
-                    headers=["Model", "Elo Score"],
-                    datatype=["str", "number"],
                     interactive=False,
-                    # Adjust row count based on the number of models
                     row_count=(len(master_df), "fixed"),
-                    col_count=(2, "fixed"),
                 )
                 # Link the radio button change to the update function
                 category_selector.change(
@@ -134,20 +184,60 @@ with demo:
                     outputs=leaderboard_df_component
                 )
-        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
-            # (Content unchanged)
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-    with gr.Row():
-        with gr.Accordion("📙 Citation", open=False):
-             # (Content unchanged)
-            citation_button = gr.Textbox(
-                value=CITATION_BUTTON_TEXT,
-                label=CITATION_BUTTON_LABEL,
-                lines=20,
-                elem_id="citation-button",
-                show_copy_button=True,
-            )
 # --- Keep scheduler if relevant ---
 # scheduler = BackgroundScheduler()
@@ -155,4 +245,8 @@ with demo:
 # scheduler.start()
 # --- Launch the app ---
-demo.launch()

 import pandas as pd
 from apscheduler.schedulers.background import BackgroundScheduler
 # Removed Hugging Face Hub imports as they are not needed for the simplified leaderboard
+# --- Make sure these imports work relative to your file structure ---
+# Option 1: If src is a directory in the same folder as your script:
+from src.about import (
     CITATION_BUTTON_LABEL,
     CITATION_BUTTON_TEXT,
+    EVALUATION_QUEUE_TEXT, # Keep if used by commented-out submit tab
     INTRODUCTION_TEXT,
     LLM_BENCHMARKS_TEXT,
     TITLE,
 )
+from src.display.css_html_js import custom_css
 from src.envs import REPO_ID # Keep if needed for restart_space or other functions
+from src.submission.submit import add_new_eval # Keep if using the submit tab
+# Option 2: If you don't have these files, define placeholders (REMOVE THIS if using Option 1)
+# print("Warning: Using placeholder values for src module imports.")
+# CITATION_BUTTON_LABEL="Citation"
+# CITATION_BUTTON_TEXT="Please cite us if you use this benchmark..."
+# EVALUATION_QUEUE_TEXT="Current evaluation queue:"
+# INTRODUCTION_TEXT="Welcome to the MLE-Dojo Benchmark Leaderboard."
+# LLM_BENCHMARKS_TEXT="Information about the benchmarks..."
+# TITLE="<h1>🏆 MLE-Dojo Benchmark Leaderboard</h1>"
+# custom_css=""
+# REPO_ID="your/space-id" # Replace with actual ID if needed
+# def add_new_eval(*args): return "Submission placeholder."
+# --- End Placeholder Definitions ---
 # --- Elo Leaderboard Configuration ---
+# Enhanced data with Rank (placeholder), Organizer, License, and URL
+# !!! IMPORTANT: Replace placeholder URLs with actual model/project pages. !!!
+# Verify organizer and license information for accuracy.
 data = [
+    {'model_name': 'gpt-4o-mini', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 753, 'Tabular_Elo': 839, 'NLP_Elo': 758, 'CV_Elo': 754, 'Overall': 778},
+    {'model_name': 'gpt-4o', 'url': 'https://openai.com/index/hello-gpt-4o/', 'organizer': 'OpenAI', 'license': 'Proprietary', 'MLE-Lite_Elo': 830, 'Tabular_Elo': 861, 'NLP_Elo': 903, 'CV_Elo': 761, 'Overall': 841},
+    {'model_name': 'o3-mini', 'url': 'https://placeholder.url/o3-mini', 'organizer': 'Unknown', 'license': 'Unknown', 'MLE-Lite_Elo': 1108, 'Tabular_Elo': 1019, 'NLP_Elo': 1056, 'CV_Elo': 1207, 'Overall': 1096}, # Fill details later
+    {'model_name': 'deepseek-v3', 'url': 'https://deepseek.com/', 'organizer': 'DeepSeek AI', 'license': 'DeepSeek License', 'MLE-Lite_Elo': 1004, 'Tabular_Elo': 1015, 'NLP_Elo': 1028, 'CV_Elo': 1067, 'Overall': 1023},
+    {'model_name': 'deepseek-r1', 'url': 'https://deepseek.com/', 'organizer': 'DeepSeek AI', 'license': 'DeepSeek License', 'MLE-Lite_Elo': 1137, 'Tabular_Elo': 1053, 'NLP_Elo': 1103, 'CV_Elo': 1083, 'Overall': 1100},
+    {'model_name': 'gemini-2.0-flash', 'url': 'https://deepmind.google/technologies/gemini/flash/', 'organizer': 'Google', 'license': 'Proprietary (API)', 'MLE-Lite_Elo': 847, 'Tabular_Elo': 923, 'NLP_Elo': 860, 'CV_Elo': 978, 'Overall': 895},
+    {'model_name': 'gemini-2.0-pro', 'url': 'https://deepmind.google/technologies/gemini/#introduction', 'organizer': 'Google', 'license': 'Proprietary (API)', 'MLE-Lite_Elo': 1064, 'Tabular_Elo': 1139, 'NLP_Elo': 1028, 'CV_Elo': 973, 'Overall': 1054},
+    {'model_name': 'gemini-2.5-pro', 'url': 'https://deepmind.google/technologies/gemini/2-5-pro/', 'organizer': 'Google', 'license': 'Proprietary (API)', 'MLE-Lite_Elo': 1257, 'Tabular_Elo': 1150, 'NLP_Elo': 1266, 'CV_Elo': 1177, 'Overall': 1214},
 ]
 # Create a master DataFrame
+# Note: Columns 'organizer' and 'license' are created in lowercase here.
 master_df = pd.DataFrame(data)
 # Define categories for selection (user-facing)
+CATEGORIES = ["Overall", "MLE-Lite", "Tabular", "NLP", "CV"] # Overall first
 DEFAULT_CATEGORY = "Overall" # Set a default category
 # Map user-facing categories to DataFrame column names
 # --- Helper function to update leaderboard ---
 def update_leaderboard(category):
     """
+    Selects relevant columns, sorts by the chosen category's Elo score,
+    adds Rank, formats model name as a link, and returns the DataFrame.
     """
     score_column = category_to_column.get(category)
     if score_column is None or score_column not in master_df.columns:
         print(f"Warning: Invalid category '{category}' or column '{score_column}'. Falling back to default.")
         score_column = category_to_column[DEFAULT_CATEGORY]
         if score_column not in master_df.columns: # Check fallback column too
+             # Return empty df with correct columns if still invalid
+             # Use lowercase keys here consistent with master_df for the empty case
+            return pd.DataFrame({
+                "Rank": [],
+                "Model": [],
+                "organizer": [], # lowercase
+                "license": [],   # lowercase
+                "Elo Score": []
+            })
+    # Select base columns + the score column for sorting
+    # Ensure 'organizer' and 'license' are selected correctly (lowercase)
+    cols_to_select = ['model_name', 'url', 'organizer', 'license', score_column]
+    df = master_df[cols_to_select].copy()
+    # Sort by the selected 'Elo Score' descending
+    df.sort_values(by=score_column, ascending=False, inplace=True)
+    # Add Rank based on the sorted order
+    df.reset_index(drop=True, inplace=True)
+    df.insert(0, 'Rank', df.index + 1)
+    # Format Model Name as HTML Hyperlink
+    # The resulting column name will be 'Model' (capitalized)
+    df['Model'] = df.apply(
+        lambda row: f"<a href='{row['url'] if pd.notna(row['url']) else '#'}' target='_blank' style='color: #007bff; text-decoration: none;'>{row['model_name']}</a>",
+        axis=1
+    )
     # Rename the score column to 'Elo Score' for consistent display
     df.rename(columns={score_column: 'Elo Score'}, inplace=True)
+    # Select and reorder columns for final display using the ACTUAL column names in df
+    # Use lowercase 'organizer' and 'license' here because they haven't been renamed.
+    final_columns = ["Rank", "Model", "organizer", "license", "Elo Score"]
+    df = df[final_columns]
+    # Note: The DataFrame returned now has columns:
+    # 'Rank', 'Model', 'organizer', 'license', 'Elo Score'
     return df
 # --- Mock/Placeholder functions/data for other tabs ---
+# (If the Submit tab is used, ensure these variables are appropriately populated or handled)
 print("Warning: Evaluation queue data fetching is disabled/mocked due to leaderboard changes.")
 finished_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
 running_eval_queue_df = pd.DataFrame(columns=["Model", "Status", "Requested", "Started"])
 # --- Keep restart function if relevant ---
 def restart_space():
+    # Make sure REPO_ID is correctly defined/imported if this function is used
     print(f"Attempting to restart space: {REPO_ID}")
+    # Replace with your actual space restart mechanism if needed (e.g., HfApi().restart_space(REPO_ID))
 # --- Gradio App Definition ---
+# Add custom CSS rules here or ensure custom_css is imported correctly
+# Example CSS rules you might want in your custom_css:
+# table { width: 100%; border-collapse: collapse; }
+# th, td { padding: 8px 12px; border: 1px solid #ddd; text-align: left; white-space: normal; } /* Allow wrapping */
+# th { background-color: #f2f2f2; font-weight: bold; }
+# tr:nth-child(even) { background-color: #f9f9f9; }
+# tr:hover { background-color: #e9e9e9; }
+# td a { color: #007bff; text-decoration: none; }
+# td a:hover { text-decoration: underline; }
+# Use a theme for better default styling
+demo = gr.Blocks(css=custom_css, theme=gr.themes.Soft())
 with demo:
+    # Use the TITLE variable imported or defined above
     gr.HTML(TITLE)
+    # Use the INTRODUCTION_TEXT variable imported or defined above
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.TabItem("🏅 MLE-Dojo Benchmark", elem_id="llm-benchmark-tab-table", id=0):
             with gr.Column():
+                gr.Markdown("## Model Elo Rankings by Category")
                 category_selector = gr.Radio(
                     choices=CATEGORIES,
+                    label="Select Category:",
+                    value=DEFAULT_CATEGORY,
                     interactive=True,
                 )
                 leaderboard_df_component = gr.Dataframe(
                     # Initialize with sorted data for the default category
                     value=update_leaderboard(DEFAULT_CATEGORY),
+                    # Headers for DISPLAY remain capitalized
+                    headers=["Rank", "Model", "Organizer", "License", "Elo Score"],
+                    # Datatype maps to the final df columns: Rank, Model, organizer, license, Elo Score
+                    datatype=["number", "html", "str", "str", "number"],
                     interactive=False,
+                    # --- FIX APPLIED: Removed unsupported 'height' argument ---
+                    # row_count determines the number of rows to display
                     row_count=(len(master_df), "fixed"),
+                    col_count=(5, "fixed"),
+                    wrap=True, # Allow text wrapping
+                    elem_id="leaderboard-table" # CSS hook for custom styling
                 )
                 # Link the radio button change to the update function
                 category_selector.change(
                     outputs=leaderboard_df_component
                 )
+        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-about", id=1):
+             # Use the LLM_BENCHMARKS_TEXT variable imported or defined above
             gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
+        # --- Submit Tab (Commented out as in original request) ---
+        # Make sure EVALUATION_QUEUE_TEXT and add_new_eval are imported/defined if uncommented
+        # with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-submit", id=2):
+        #     with gr.Column():
+        #          with gr.Row():
+        #              gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") # Requires import/definition
+        #          with gr.Column():
+        #              with gr.Accordion(f"✅ Finished Evaluations ({len(finished_eval_queue_df)})", open=False):
+        #                   finished_eval_table = gr.components.Dataframe(
+        #                       value=finished_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
+        #                  )
+        #              with gr.Accordion(f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})", open=False):
+        #                   running_eval_table = gr.components.Dataframe(
+        #                       value=running_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
+        #                  )
+        #              with gr.Accordion(f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})", open=False):
+        #                  pending_eval_table = gr.components.Dataframe(
+        #                      value=pending_eval_queue_df, headers=EVAL_COLS, datatype=EVAL_TYPES, row_count=5,
+        #                  )
+        #     with gr.Row():
+        #          gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
+        #     with gr.Row():
+        #          with gr.Column():
+        #              model_name_textbox = gr.Textbox(label="Model name (on Hugging Face Hub)")
+        #              revision_name_textbox = gr.Textbox(label="Revision / Commit Hash", placeholder="main")
+        #              model_type = gr.Dropdown(choices=["Type A", "Type B", "Type C"], label="Model type", multiselect=False, value=None, interactive=True) # Example choices
+        #          with gr.Column():
+        #              precision = gr.Dropdown(choices=["float16", "bfloat16", "float32", "int8", "auto"], label="Precision", multiselect=False, value="auto", interactive=True)
+        #              weight_type = gr.Dropdown(choices=["Original", "Adapter", "Delta"], label="Weights type", multiselect=False, value="Original", interactive=True)
+        #              base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
+        #     submit_button = gr.Button("Submit Eval")
+        #     submission_result = gr.Markdown()
+        #     # Ensure add_new_eval is correctly imported/defined and handles these inputs
+        #     submit_button.click(
+        #          add_new_eval, # Requires import/definition
+        #          [ model_name_textbox, base_model_name_textbox, revision_name_textbox, precision, weight_type, model_type, ],
+        #          submission_result,
+        #      )
+    # --- Citation Row (at the bottom, outside Tabs) ---
+    with gr.Accordion("📙 Citation", open=False):
+         # Use the CITATION_BUTTON_TEXT and CITATION_BUTTON_LABEL variables imported or defined above
+         citation_button = gr.Textbox(
+             value=CITATION_BUTTON_TEXT,
+             label=CITATION_BUTTON_LABEL,
+             lines=10,
+             elem_id="citation-button",
+             show_copy_button=True,
+         )
 # --- Keep scheduler if relevant ---
 # scheduler = BackgroundScheduler()
 # scheduler.start()
 # --- Launch the app ---
+# Ensures the app launches only when the script is run directly
+if __name__ == "__main__":
+    # Ensure you have installed necessary libraries: pip install gradio pandas apscheduler
+    # Make sure your src module files (about.py etc.) are accessible OR use the placeholder definitions above.
+    demo.launch()