Spaces:
Running
Running
| """ | |
| Main entry point for the Model Capability Leaderboard application. | |
| """ | |
| import streamlit as st | |
| # Import configuration | |
| from src.utils.config import app_config, metrics_config | |
| # Import data functions | |
| from src.utils.data_loader import ( | |
| load_metric_data, | |
| process_data, | |
| filter_and_prepare_data, | |
| format_display_dataframe | |
| ) | |
| # Import styles | |
| from src.styles.base import load_all_styles | |
| # Import components | |
| from src.components.header import render_page_header | |
| from src.components.filters import ( | |
| initialize_session_state, | |
| render_metric_selection, | |
| render_task_selection | |
| ) | |
| from src.components.leaderboard import render_leaderboard_table, render_empty_state | |
| from src.components.tasks import render_task_descriptions | |
| def setup_page(): | |
| """ | |
| Set up the Streamlit page configuration | |
| """ | |
| st.set_page_config( | |
| page_title=app_config['title'], | |
| layout=app_config['layout'], | |
| initial_sidebar_state=app_config['initial_sidebar_state'] | |
| ) | |
| # Load all styles | |
| load_all_styles() | |
| # Force dark mode using custom CSS | |
| st.markdown(""" | |
| <style> | |
| /* Force dark mode regardless of browser settings */ | |
| .stApp { | |
| background-color: #1a202c !important; | |
| color: #e2e8f0 !important; | |
| } | |
| /* Override Streamlit's default styling to ensure dark mode */ | |
| .stTextInput, .stSelectbox, .stMultiselect { | |
| background-color: #2d3748 !important; | |
| color: #e2e8f0 !important; | |
| } | |
| .stButton>button { | |
| background-color: #4a5568 !important; | |
| color: #e2e8f0 !important; | |
| } | |
| /* Override header and text colors */ | |
| h1, h2, h3, h4, h5, h6, p, span, div { | |
| color: #e2e8f0 !important; | |
| } | |
| /* Ensure tab styling is consistent */ | |
| .stTabs [data-baseweb="tab-list"] { | |
| background-color: #1a202c !important; | |
| } | |
| .stTabs [data-baseweb="tab"] { | |
| color: #e2e8f0 !important; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| def main(): | |
| """ | |
| Main application function | |
| """ | |
| # Set up page | |
| setup_page() | |
| # Render header | |
| render_page_header() | |
| # Load primary metric data (first metric in config) | |
| primary_metric = list(metrics_config.keys())[0] | |
| metric_data = load_metric_data(metrics_config[primary_metric]["file"]) | |
| df = process_data(metric_data) | |
| # Initialize session state | |
| initialize_session_state(df) | |
| # Create tabs | |
| tabs = st.tabs(["π Leaderboard", "π Benchmark Details"]) | |
| # Tab 1: Leaderboard | |
| with tabs[0]: | |
| # Render filter components | |
| selected_metrics = render_metric_selection() | |
| # Continue with other filters | |
| selected_tasks = render_task_selection(df) | |
| # Render leaderboard if selections are valid | |
| if selected_tasks: | |
| # Load the primary metric data first (always the first in selected_metrics) | |
| primary_metric = selected_metrics[0] | |
| primary_metric_data = load_metric_data(metrics_config[primary_metric]["file"]) | |
| primary_df = process_data(primary_metric_data) | |
| # Filter and prepare data for primary metric | |
| filtered_df = filter_and_prepare_data(primary_df, selected_tasks, st.session_state.selected_model_types) | |
| # Format data for display | |
| display_df, metric_columns = format_display_dataframe(filtered_df, selected_tasks) | |
| # If additional metrics are selected, add their data too | |
| all_metric_columns = metric_columns.copy() | |
| for metric in selected_metrics[1:]: | |
| metric_info = metrics_config[metric] | |
| metric_data = load_metric_data(metric_info["file"]) | |
| metric_df = process_data(metric_data) | |
| # Process and merge the additional metric data | |
| metric_filtered_df = filter_and_prepare_data(metric_df, selected_tasks, st.session_state.selected_model_types) | |
| metric_display_df, _ = format_display_dataframe(metric_filtered_df, selected_tasks) | |
| # Create a meaningful prefix for this metric | |
| if metric == "Absolute Improvement to Baseline": | |
| prefix = "Abs" | |
| else: | |
| # Use first word of each part of the metric name | |
| prefix = "".join([word[0] for word in metric.split()]).upper() | |
| # Combine the dataframes - keep only metric columns from metric_display_df | |
| for col in metric_columns: | |
| if col in metric_display_df.columns: | |
| # Add columns with metric prefix | |
| display_df[f"{prefix}: {col}"] = metric_display_df[col] | |
| # Add to the list of all metric columns | |
| all_metric_columns.append(f"{prefix}: {col}") | |
| # Render the leaderboard table | |
| render_leaderboard_table(display_df, all_metric_columns, primary_metric) | |
| else: | |
| # Show empty state | |
| render_empty_state() | |
| # Tab 2: Benchmark Details | |
| with tabs[1]: | |
| # Render task descriptions | |
| render_task_descriptions() | |
| # Footer removed per user request | |
| if __name__ == "__main__": | |
| main() |