Spaces:

togethercomputer
/

FutureBench

Running

App Files Files Community

FutureBench / app.py

vinid

Leaderboard deployment 2025-07-16 18:05:41

6441bc6 12 days ago

raw

history blame contribute delete

9.76 kB

	import os

	import gradio as gr
	import pandas as pd
	from apscheduler.schedulers.background import BackgroundScheduler
	from gradio_rangeslider import RangeSlider
	from huggingface_hub import snapshot_download

	# Import our data processing utilities
	from process_data import API, DATA_REPO, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, PREDICTIONS_CSV_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN

	# Import our leaderboard components
	from src.about import ABOUT_TEXT, INTRODUCTION_TEXT, TITLE
	from src.display_utils import CUSTOM_CSS, get_display_columns
	from src.leaderboard_utils import create_leaderboard_df, get_available_weeks, get_leaderboard_summary

	# Global variables for data
	PREDICTIONS_DF = None
	LEADERBOARD_DF = None
	PREDICTION_DATES = []
	AVAILABLE_WEEKS = []
	DATA_SUMMARY = {}


	def restart_space():
	"""Restart the space if needed"""
	API.restart_space(repo_id=REPO_ID)


	def download_and_process_data():
	"""Download and process data on startup"""
	global PREDICTIONS_DF, LEADERBOARD_DF, PREDICTION_DATES, AVAILABLE_WEEKS, DATA_SUMMARY

	print("=== Starting Data Download ===")

	# Download eval requests (queue)
	try:
	print(f"Downloading eval requests to {EVAL_REQUESTS_PATH}")
	snapshot_download(
	repo_id=QUEUE_REPO,
	local_dir=EVAL_REQUESTS_PATH,
	repo_type="dataset",
	tqdm_class=None,
	etag_timeout=30,
	token=TOKEN,
	)
	print("✓ Eval requests downloaded successfully")
	except Exception as e:
	print(f"Error downloading eval requests: {e}")

	# Download eval results
	try:
	print(f"Downloading eval results to {EVAL_RESULTS_PATH}")
	snapshot_download(
	repo_id=RESULTS_REPO,
	local_dir=EVAL_RESULTS_PATH,
	repo_type="dataset",
	tqdm_class=None,
	etag_timeout=30,
	token=TOKEN,
	)
	print("✓ Eval results downloaded successfully")
	except Exception as e:
	print(f"Error downloading eval results: {e}")

	# Download prediction data (main dataset)
	try:
	print(f"Downloading prediction data to {PREDICTIONS_CSV_PATH}")
	snapshot_download(
	repo_id=DATA_REPO,
	local_dir=PREDICTIONS_CSV_PATH,
	repo_type="dataset",
	tqdm_class=None,
	etag_timeout=30,
	token=TOKEN,
	)
	print("✓ Prediction data downloaded successfully")
	except Exception as e:
	print(f"Error downloading prediction data: {e}")

	# Process the data
	print("=== Processing Data ===")

	# Load the main dataset
	csv_path = os.path.join(PREDICTIONS_CSV_PATH, "data.csv")
	if os.path.exists(csv_path):
	print(f"Loading data from {csv_path}")
	PREDICTIONS_DF = pd.read_csv(csv_path)

	# Convert date columns
	PREDICTIONS_DF["open_to_bet_until"] = pd.to_datetime(PREDICTIONS_DF["open_to_bet_until"])
	PREDICTIONS_DF["prediction_created_at"] = pd.to_datetime(PREDICTIONS_DF["prediction_created_at"])

	# Get prediction dates
	PREDICTION_DATES = sorted(PREDICTIONS_DF["open_to_bet_until"].dt.date.unique())

	# Get available weeks for filtering
	AVAILABLE_WEEKS = get_available_weeks(PREDICTIONS_DF)

	# Create leaderboard
	print("Creating leaderboard...")
	LEADERBOARD_DF = create_leaderboard_df(PREDICTIONS_DF)

	# Create data summary
	leaderboard_summary = get_leaderboard_summary(LEADERBOARD_DF)
	DATA_SUMMARY = {
	"total_records": len(PREDICTIONS_DF),
	"unique_events": PREDICTIONS_DF["event_id"].nunique(),
	"unique_algorithms": PREDICTIONS_DF["algorithm_name"].nunique(),
	"unique_event_types": PREDICTIONS_DF["event_type"].nunique(),
	"date_range": f"{PREDICTION_DATES[0]} to {PREDICTION_DATES[-1]}" if PREDICTION_DATES else "N/A",
	"algorithms": PREDICTIONS_DF["algorithm_name"].unique().tolist(),
	"event_types": PREDICTIONS_DF["event_type"].unique().tolist(),
	"leaderboard_summary": leaderboard_summary,
	}

	print("✓ Data processed successfully")
	print(f" - Total records: {DATA_SUMMARY['total_records']}")
	print(f" - Unique events: {DATA_SUMMARY['unique_events']}")
	print(f" - Unique algorithms: {DATA_SUMMARY['unique_algorithms']}")
	print(f" - Leaderboard models: {leaderboard_summary['total_models']}")
	print(f" - Date range: {DATA_SUMMARY['date_range']}")

	else:
	print(f"❌ Error: data.csv not found at {csv_path}")
	PREDICTIONS_DF = pd.DataFrame()
	LEADERBOARD_DF = pd.DataFrame()
	DATA_SUMMARY = {"error": "No data found"}


	def get_leaderboard(date_range=None):
	"""Return leaderboard filtered by date range"""
	if PREDICTIONS_DF is None or PREDICTIONS_DF.empty:
	return pd.DataFrame({"message": ["No data available"]})

	# Determine range of dates to filter by
	if not PREDICTION_DATES:
	return pd.DataFrame({"message": ["No dates available"]})

	if date_range is None:
	start_idx, end_idx = 0, len(PREDICTION_DATES) - 1
	else:
	start_idx, end_idx = date_range
	start_idx = max(0, min(start_idx, len(PREDICTION_DATES) - 1))
	end_idx = max(start_idx, min(end_idx, len(PREDICTION_DATES) - 1))
	start_idx, end_idx = int(start_idx), int(end_idx)

	week_range = (PREDICTION_DATES[start_idx], PREDICTION_DATES[end_idx])

	# Create filtered leaderboard
	filtered_leaderboard = create_leaderboard_df(PREDICTIONS_DF, week_range)

	if filtered_leaderboard.empty:
	return pd.DataFrame({"message": ["No data available for selected week"]})

	# Return only display columns
	display_cols = get_display_columns()
	available_cols = [col for col in display_cols if col in filtered_leaderboard.columns]

	return filtered_leaderboard[available_cols]


	def get_data_summary():
	"""Return formatted data summary"""
	if not DATA_SUMMARY:
	return "No data loaded"

	if "error" in DATA_SUMMARY:
	return f"Error: {DATA_SUMMARY['error']}"

	summary = DATA_SUMMARY.get("leaderboard_summary", {})

	summary_text = f"""
	# 🏆 Leaderboard Summary

	- Models Ranked: {summary.get("total_models", 0)}
	- Total Predictions: {summary.get("total_predictions", 0):,}
	- Average Accuracy: {summary.get("avg_accuracy", 0):.1f}%

	# 📊 Dataset Overview

	- Total Records: {DATA_SUMMARY["total_records"]:,}
	- Unique Events: {DATA_SUMMARY["unique_events"]:,}
	- Event Types: {DATA_SUMMARY["unique_event_types"]}
	- Date Range: {DATA_SUMMARY["date_range"]}

	## 🤖 Models
	{", ".join(DATA_SUMMARY["algorithms"])}

	## 📋 Event Types
	{", ".join(DATA_SUMMARY["event_types"])}
	"""

	return summary_text


	def get_sample_data():
	"""Return sample of the data"""
	if PREDICTIONS_DF is None or PREDICTIONS_DF.empty:
	return pd.DataFrame({"message": ["No data available"]})

	# Return first 10 rows with key columns
	sample_cols = ["event_id", "question", "event_type", "algorithm_name", "actual_prediction", "result", "open_to_bet_until"]
	available_cols = [col for col in sample_cols if col in PREDICTIONS_DF.columns]

	return PREDICTIONS_DF[available_cols].head(10)


	def refresh_all_data(date_range=None):
	"""Refresh all data and return updated components"""
	download_and_process_data()
	return (
	get_leaderboard(date_range),
	get_data_summary(),
	get_sample_data(),
	)


	# Download and process data on startup
	download_and_process_data()

	# Create Gradio interface
	with gr.Blocks(css=CUSTOM_CSS, title="FutureBench Leaderboard") as demo:
	gr.HTML(TITLE)
	with gr.Row():
	gr.Image("image/image.png", height=200, width=200, show_label=False, show_download_button=False, show_fullscreen_button=False, container=False, elem_classes="center-logo")
	gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

	with gr.Tabs():
	with gr.TabItem("🏆 Leaderboard"):
	leaderboard_display = gr.Dataframe(value=get_leaderboard(), interactive=False, wrap=True, elem_id="leaderboard-table")

	with gr.Row():
	date_slider = RangeSlider(
	minimum=0,
	maximum=len(PREDICTION_DATES) - 1,
	value=(0, len(PREDICTION_DATES) - 1),
	step=1,
	label="📅 Date Range",
	show_label=True,
	labels=[str(d) for d in PREDICTION_DATES],
	)

	# Update leaderboard when date range is changed
	date_slider.change(get_leaderboard, inputs=date_slider, outputs=leaderboard_display)

	with gr.TabItem("📊 Summary"):
	summary_display = gr.Markdown(get_data_summary(), elem_classes="markdown-text")
	refresh_summary_btn = gr.Button("🔄 Refresh Summary")

	refresh_summary_btn.click(lambda: get_data_summary(), outputs=summary_display)

	with gr.TabItem("🔍 Sample Data"):
	sample_display = gr.Dataframe(value=get_sample_data(), interactive=False, wrap=True)
	refresh_sample_btn = gr.Button("🔄 Refresh Sample")

	refresh_sample_btn.click(lambda: get_sample_data(), outputs=sample_display)

	with gr.TabItem("📋 About"):
	gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")

	if __name__ == "__main__":
	scheduler = BackgroundScheduler()
	scheduler.add_job(restart_space, "interval", seconds=1800)
	scheduler.start()
	demo.queue(default_concurrency_limit=40).launch()