MLRC_Bench

Running

App Files Files Community

MLRC_Bench / src /app.py

Armeddinosaur

Removing Model Type Selection

c08520d 8 months ago

raw

history blame contribute delete

5.38 kB

	"""
	Main entry point for the Model Capability Leaderboard application.
	"""
	import streamlit as st

	# Import configuration
	from src.utils.config import app_config, metrics_config

	# Import data functions
	from src.utils.data_loader import (
	load_metric_data,
	process_data,
	filter_and_prepare_data,
	format_display_dataframe
	)

	# Import styles
	from src.styles.base import load_all_styles

	# Import components
	from src.components.header import render_page_header
	from src.components.filters import (
	initialize_session_state,
	render_metric_selection,
	render_task_selection
	)
	from src.components.leaderboard import render_leaderboard_table, render_empty_state
	from src.components.tasks import render_task_descriptions

	def setup_page():
	"""
	Set up the Streamlit page configuration
	"""
	st.set_page_config(
	page_title=app_config['title'],
	layout=app_config['layout'],
	initial_sidebar_state=app_config['initial_sidebar_state']
	)

	# Load all styles
	load_all_styles()

	# Force dark mode using custom CSS
	st.markdown("""
	<style>
	/* Force dark mode regardless of browser settings */
	.stApp {
	background-color: #1a202c !important;
	color: #e2e8f0 !important;
	}
	/* Override Streamlit's default styling to ensure dark mode */
	.stTextInput, .stSelectbox, .stMultiselect {
	background-color: #2d3748 !important;
	color: #e2e8f0 !important;
	}
	.stButton>button {
	background-color: #4a5568 !important;
	color: #e2e8f0 !important;
	}
	/* Override header and text colors */
	h1, h2, h3, h4, h5, h6, p, span, div {
	color: #e2e8f0 !important;
	}
	/* Ensure tab styling is consistent */
	.stTabs [data-baseweb="tab-list"] {
	background-color: #1a202c !important;
	}
	.stTabs [data-baseweb="tab"] {
	color: #e2e8f0 !important;
	}
	</style>
	""", unsafe_allow_html=True)

	def main():
	"""
	Main application function
	"""
	# Set up page
	setup_page()

	# Render header
	render_page_header()

	# Load primary metric data (first metric in config)
	primary_metric = list(metrics_config.keys())[0]
	metric_data = load_metric_data(metrics_config[primary_metric]["file"])
	df = process_data(metric_data)

	# Initialize session state
	initialize_session_state(df)

	# Create tabs
	tabs = st.tabs(["📊 Leaderboard", "📑 Benchmark Details"])

	# Tab 1: Leaderboard
	with tabs[0]:
	# Render filter components
	selected_metrics = render_metric_selection()

	# Continue with other filters
	selected_tasks = render_task_selection(df)

	# Render leaderboard if selections are valid
	if selected_tasks:
	# Load the primary metric data first (always the first in selected_metrics)
	primary_metric = selected_metrics[0]
	primary_metric_data = load_metric_data(metrics_config[primary_metric]["file"])
	primary_df = process_data(primary_metric_data)

	# Filter and prepare data for primary metric
	filtered_df = filter_and_prepare_data(primary_df, selected_tasks, st.session_state.selected_model_types)

	# Format data for display
	display_df, metric_columns = format_display_dataframe(filtered_df, selected_tasks)

	# If additional metrics are selected, add their data too
	all_metric_columns = metric_columns.copy()

	for metric in selected_metrics[1:]:
	metric_info = metrics_config[metric]
	metric_data = load_metric_data(metric_info["file"])
	metric_df = process_data(metric_data)

	# Process and merge the additional metric data
	metric_filtered_df = filter_and_prepare_data(metric_df, selected_tasks, st.session_state.selected_model_types)
	metric_display_df, _ = format_display_dataframe(metric_filtered_df, selected_tasks)

	# Create a meaningful prefix for this metric
	if metric == "Absolute Improvement to Baseline":
	prefix = "Abs"
	else:
	# Use first word of each part of the metric name
	prefix = "".join([word[0] for word in metric.split()]).upper()

	# Combine the dataframes - keep only metric columns from metric_display_df
	for col in metric_columns:
	if col in metric_display_df.columns:
	# Add columns with metric prefix
	display_df[f"{prefix}: {col}"] = metric_display_df[col]
	# Add to the list of all metric columns
	all_metric_columns.append(f"{prefix}: {col}")

	# Render the leaderboard table
	render_leaderboard_table(display_df, all_metric_columns, primary_metric)
	else:
	# Show empty state
	render_empty_state()

	# Tab 2: Benchmark Details
	with tabs[1]:
	# Render task descriptions
	render_task_descriptions()

	# Footer removed per user request

	if __name__ == "__main__":
	main()