FutureBench / app.py
vinid's picture
Leaderboard deployment 2025-07-16 18:05:41
6441bc6
import os
import gradio as gr
import pandas as pd
from apscheduler.schedulers.background import BackgroundScheduler
from gradio_rangeslider import RangeSlider
from huggingface_hub import snapshot_download
# Import our data processing utilities
from process_data import API, DATA_REPO, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, PREDICTIONS_CSV_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
# Import our leaderboard components
from src.about import ABOUT_TEXT, INTRODUCTION_TEXT, TITLE
from src.display_utils import CUSTOM_CSS, get_display_columns
from src.leaderboard_utils import create_leaderboard_df, get_available_weeks, get_leaderboard_summary
# Global variables for data
PREDICTIONS_DF = None
LEADERBOARD_DF = None
PREDICTION_DATES = []
AVAILABLE_WEEKS = []
DATA_SUMMARY = {}
def restart_space():
"""Restart the space if needed"""
API.restart_space(repo_id=REPO_ID)
def download_and_process_data():
"""Download and process data on startup"""
global PREDICTIONS_DF, LEADERBOARD_DF, PREDICTION_DATES, AVAILABLE_WEEKS, DATA_SUMMARY
print("=== Starting Data Download ===")
# Download eval requests (queue)
try:
print(f"Downloading eval requests to {EVAL_REQUESTS_PATH}")
snapshot_download(
repo_id=QUEUE_REPO,
local_dir=EVAL_REQUESTS_PATH,
repo_type="dataset",
tqdm_class=None,
etag_timeout=30,
token=TOKEN,
)
print("โœ“ Eval requests downloaded successfully")
except Exception as e:
print(f"Error downloading eval requests: {e}")
# Download eval results
try:
print(f"Downloading eval results to {EVAL_RESULTS_PATH}")
snapshot_download(
repo_id=RESULTS_REPO,
local_dir=EVAL_RESULTS_PATH,
repo_type="dataset",
tqdm_class=None,
etag_timeout=30,
token=TOKEN,
)
print("โœ“ Eval results downloaded successfully")
except Exception as e:
print(f"Error downloading eval results: {e}")
# Download prediction data (main dataset)
try:
print(f"Downloading prediction data to {PREDICTIONS_CSV_PATH}")
snapshot_download(
repo_id=DATA_REPO,
local_dir=PREDICTIONS_CSV_PATH,
repo_type="dataset",
tqdm_class=None,
etag_timeout=30,
token=TOKEN,
)
print("โœ“ Prediction data downloaded successfully")
except Exception as e:
print(f"Error downloading prediction data: {e}")
# Process the data
print("=== Processing Data ===")
# Load the main dataset
csv_path = os.path.join(PREDICTIONS_CSV_PATH, "data.csv")
if os.path.exists(csv_path):
print(f"Loading data from {csv_path}")
PREDICTIONS_DF = pd.read_csv(csv_path)
# Convert date columns
PREDICTIONS_DF["open_to_bet_until"] = pd.to_datetime(PREDICTIONS_DF["open_to_bet_until"])
PREDICTIONS_DF["prediction_created_at"] = pd.to_datetime(PREDICTIONS_DF["prediction_created_at"])
# Get prediction dates
PREDICTION_DATES = sorted(PREDICTIONS_DF["open_to_bet_until"].dt.date.unique())
# Get available weeks for filtering
AVAILABLE_WEEKS = get_available_weeks(PREDICTIONS_DF)
# Create leaderboard
print("Creating leaderboard...")
LEADERBOARD_DF = create_leaderboard_df(PREDICTIONS_DF)
# Create data summary
leaderboard_summary = get_leaderboard_summary(LEADERBOARD_DF)
DATA_SUMMARY = {
"total_records": len(PREDICTIONS_DF),
"unique_events": PREDICTIONS_DF["event_id"].nunique(),
"unique_algorithms": PREDICTIONS_DF["algorithm_name"].nunique(),
"unique_event_types": PREDICTIONS_DF["event_type"].nunique(),
"date_range": f"{PREDICTION_DATES[0]} to {PREDICTION_DATES[-1]}" if PREDICTION_DATES else "N/A",
"algorithms": PREDICTIONS_DF["algorithm_name"].unique().tolist(),
"event_types": PREDICTIONS_DF["event_type"].unique().tolist(),
"leaderboard_summary": leaderboard_summary,
}
print("โœ“ Data processed successfully")
print(f" - Total records: {DATA_SUMMARY['total_records']}")
print(f" - Unique events: {DATA_SUMMARY['unique_events']}")
print(f" - Unique algorithms: {DATA_SUMMARY['unique_algorithms']}")
print(f" - Leaderboard models: {leaderboard_summary['total_models']}")
print(f" - Date range: {DATA_SUMMARY['date_range']}")
else:
print(f"โŒ Error: data.csv not found at {csv_path}")
PREDICTIONS_DF = pd.DataFrame()
LEADERBOARD_DF = pd.DataFrame()
DATA_SUMMARY = {"error": "No data found"}
def get_leaderboard(date_range=None):
"""Return leaderboard filtered by date range"""
if PREDICTIONS_DF is None or PREDICTIONS_DF.empty:
return pd.DataFrame({"message": ["No data available"]})
# Determine range of dates to filter by
if not PREDICTION_DATES:
return pd.DataFrame({"message": ["No dates available"]})
if date_range is None:
start_idx, end_idx = 0, len(PREDICTION_DATES) - 1
else:
start_idx, end_idx = date_range
start_idx = max(0, min(start_idx, len(PREDICTION_DATES) - 1))
end_idx = max(start_idx, min(end_idx, len(PREDICTION_DATES) - 1))
start_idx, end_idx = int(start_idx), int(end_idx)
week_range = (PREDICTION_DATES[start_idx], PREDICTION_DATES[end_idx])
# Create filtered leaderboard
filtered_leaderboard = create_leaderboard_df(PREDICTIONS_DF, week_range)
if filtered_leaderboard.empty:
return pd.DataFrame({"message": ["No data available for selected week"]})
# Return only display columns
display_cols = get_display_columns()
available_cols = [col for col in display_cols if col in filtered_leaderboard.columns]
return filtered_leaderboard[available_cols]
def get_data_summary():
"""Return formatted data summary"""
if not DATA_SUMMARY:
return "No data loaded"
if "error" in DATA_SUMMARY:
return f"Error: {DATA_SUMMARY['error']}"
summary = DATA_SUMMARY.get("leaderboard_summary", {})
summary_text = f"""
# ๐Ÿ† Leaderboard Summary
- **Models Ranked**: {summary.get("total_models", 0)}
- **Total Predictions**: {summary.get("total_predictions", 0):,}
- **Average Accuracy**: {summary.get("avg_accuracy", 0):.1f}%
# ๐Ÿ“Š Dataset Overview
- **Total Records**: {DATA_SUMMARY["total_records"]:,}
- **Unique Events**: {DATA_SUMMARY["unique_events"]:,}
- **Event Types**: {DATA_SUMMARY["unique_event_types"]}
- **Date Range**: {DATA_SUMMARY["date_range"]}
## ๐Ÿค– Models
{", ".join(DATA_SUMMARY["algorithms"])}
## ๐Ÿ“‹ Event Types
{", ".join(DATA_SUMMARY["event_types"])}
"""
return summary_text
def get_sample_data():
"""Return sample of the data"""
if PREDICTIONS_DF is None or PREDICTIONS_DF.empty:
return pd.DataFrame({"message": ["No data available"]})
# Return first 10 rows with key columns
sample_cols = ["event_id", "question", "event_type", "algorithm_name", "actual_prediction", "result", "open_to_bet_until"]
available_cols = [col for col in sample_cols if col in PREDICTIONS_DF.columns]
return PREDICTIONS_DF[available_cols].head(10)
def refresh_all_data(date_range=None):
"""Refresh all data and return updated components"""
download_and_process_data()
return (
get_leaderboard(date_range),
get_data_summary(),
get_sample_data(),
)
# Download and process data on startup
download_and_process_data()
# Create Gradio interface
with gr.Blocks(css=CUSTOM_CSS, title="FutureBench Leaderboard") as demo:
gr.HTML(TITLE)
with gr.Row():
gr.Image("image/image.png", height=200, width=200, show_label=False, show_download_button=False, show_fullscreen_button=False, container=False, elem_classes="center-logo")
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs():
with gr.TabItem("๐Ÿ† Leaderboard"):
leaderboard_display = gr.Dataframe(value=get_leaderboard(), interactive=False, wrap=True, elem_id="leaderboard-table")
with gr.Row():
date_slider = RangeSlider(
minimum=0,
maximum=len(PREDICTION_DATES) - 1,
value=(0, len(PREDICTION_DATES) - 1),
step=1,
label="๐Ÿ“… Date Range",
show_label=True,
labels=[str(d) for d in PREDICTION_DATES],
)
# Update leaderboard when date range is changed
date_slider.change(get_leaderboard, inputs=date_slider, outputs=leaderboard_display)
with gr.TabItem("๐Ÿ“Š Summary"):
summary_display = gr.Markdown(get_data_summary(), elem_classes="markdown-text")
refresh_summary_btn = gr.Button("๐Ÿ”„ Refresh Summary")
refresh_summary_btn.click(lambda: get_data_summary(), outputs=summary_display)
with gr.TabItem("๐Ÿ” Sample Data"):
sample_display = gr.Dataframe(value=get_sample_data(), interactive=False, wrap=True)
refresh_sample_btn = gr.Button("๐Ÿ”„ Refresh Sample")
refresh_sample_btn.click(lambda: get_sample_data(), outputs=sample_display)
with gr.TabItem("๐Ÿ“‹ About"):
gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")
if __name__ == "__main__":
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.start()
demo.queue(default_concurrency_limit=40).launch()