import os import gradio as gr import pandas as pd from apscheduler.schedulers.background import BackgroundScheduler from gradio_rangeslider import RangeSlider from huggingface_hub import snapshot_download # Import our data processing utilities from process_data import API, DATA_REPO, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, PREDICTIONS_CSV_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN # Import our leaderboard components from src.about import ABOUT_TEXT, INTRODUCTION_TEXT, TITLE from src.display_utils import CUSTOM_CSS, get_display_columns from src.leaderboard_utils import create_leaderboard_df, get_available_weeks, get_leaderboard_summary # Global variables for data PREDICTIONS_DF = None LEADERBOARD_DF = None PREDICTION_DATES = [] AVAILABLE_WEEKS = [] DATA_SUMMARY = {} def restart_space(): """Restart the space if needed""" API.restart_space(repo_id=REPO_ID) def download_and_process_data(): """Download and process data on startup""" global PREDICTIONS_DF, LEADERBOARD_DF, PREDICTION_DATES, AVAILABLE_WEEKS, DATA_SUMMARY print("=== Starting Data Download ===") # Download eval requests (queue) try: print(f"Downloading eval requests to {EVAL_REQUESTS_PATH}") snapshot_download( repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN, ) print("✓ Eval requests downloaded successfully") except Exception as e: print(f"Error downloading eval requests: {e}") # Download eval results try: print(f"Downloading eval results to {EVAL_RESULTS_PATH}") snapshot_download( repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN, ) print("✓ Eval results downloaded successfully") except Exception as e: print(f"Error downloading eval results: {e}") # Download prediction data (main dataset) try: print(f"Downloading prediction data to {PREDICTIONS_CSV_PATH}") snapshot_download( repo_id=DATA_REPO, local_dir=PREDICTIONS_CSV_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN, ) print("✓ Prediction data downloaded successfully") except Exception as e: print(f"Error downloading prediction data: {e}") # Process the data print("=== Processing Data ===") # Load the main dataset csv_path = os.path.join(PREDICTIONS_CSV_PATH, "data.csv") if os.path.exists(csv_path): print(f"Loading data from {csv_path}") PREDICTIONS_DF = pd.read_csv(csv_path) # Convert date columns PREDICTIONS_DF["open_to_bet_until"] = pd.to_datetime(PREDICTIONS_DF["open_to_bet_until"]) PREDICTIONS_DF["prediction_created_at"] = pd.to_datetime(PREDICTIONS_DF["prediction_created_at"]) # Get prediction dates PREDICTION_DATES = sorted(PREDICTIONS_DF["open_to_bet_until"].dt.date.unique()) # Get available weeks for filtering AVAILABLE_WEEKS = get_available_weeks(PREDICTIONS_DF) # Create leaderboard print("Creating leaderboard...") LEADERBOARD_DF = create_leaderboard_df(PREDICTIONS_DF) # Create data summary leaderboard_summary = get_leaderboard_summary(LEADERBOARD_DF) DATA_SUMMARY = { "total_records": len(PREDICTIONS_DF), "unique_events": PREDICTIONS_DF["event_id"].nunique(), "unique_algorithms": PREDICTIONS_DF["algorithm_name"].nunique(), "unique_event_types": PREDICTIONS_DF["event_type"].nunique(), "date_range": f"{PREDICTION_DATES[0]} to {PREDICTION_DATES[-1]}" if PREDICTION_DATES else "N/A", "algorithms": PREDICTIONS_DF["algorithm_name"].unique().tolist(), "event_types": PREDICTIONS_DF["event_type"].unique().tolist(), "leaderboard_summary": leaderboard_summary, } print("✓ Data processed successfully") print(f" - Total records: {DATA_SUMMARY['total_records']}") print(f" - Unique events: {DATA_SUMMARY['unique_events']}") print(f" - Unique algorithms: {DATA_SUMMARY['unique_algorithms']}") print(f" - Leaderboard models: {leaderboard_summary['total_models']}") print(f" - Date range: {DATA_SUMMARY['date_range']}") else: print(f"❌ Error: data.csv not found at {csv_path}") PREDICTIONS_DF = pd.DataFrame() LEADERBOARD_DF = pd.DataFrame() DATA_SUMMARY = {"error": "No data found"} def get_leaderboard(date_range=None): """Return leaderboard filtered by date range""" if PREDICTIONS_DF is None or PREDICTIONS_DF.empty: return pd.DataFrame({"message": ["No data available"]}) # Determine range of dates to filter by if not PREDICTION_DATES: return pd.DataFrame({"message": ["No dates available"]}) if date_range is None: start_idx, end_idx = 0, len(PREDICTION_DATES) - 1 else: start_idx, end_idx = date_range start_idx = max(0, min(start_idx, len(PREDICTION_DATES) - 1)) end_idx = max(start_idx, min(end_idx, len(PREDICTION_DATES) - 1)) start_idx, end_idx = int(start_idx), int(end_idx) week_range = (PREDICTION_DATES[start_idx], PREDICTION_DATES[end_idx]) # Create filtered leaderboard filtered_leaderboard = create_leaderboard_df(PREDICTIONS_DF, week_range) if filtered_leaderboard.empty: return pd.DataFrame({"message": ["No data available for selected week"]}) # Return only display columns display_cols = get_display_columns() available_cols = [col for col in display_cols if col in filtered_leaderboard.columns] return filtered_leaderboard[available_cols] def get_data_summary(): """Return formatted data summary""" if not DATA_SUMMARY: return "No data loaded" if "error" in DATA_SUMMARY: return f"Error: {DATA_SUMMARY['error']}" summary = DATA_SUMMARY.get("leaderboard_summary", {}) summary_text = f""" # 🏆 Leaderboard Summary - **Models Ranked**: {summary.get("total_models", 0)} - **Total Predictions**: {summary.get("total_predictions", 0):,} - **Average Accuracy**: {summary.get("avg_accuracy", 0):.1f}% # 📊 Dataset Overview - **Total Records**: {DATA_SUMMARY["total_records"]:,} - **Unique Events**: {DATA_SUMMARY["unique_events"]:,} - **Event Types**: {DATA_SUMMARY["unique_event_types"]} - **Date Range**: {DATA_SUMMARY["date_range"]} ## 🤖 Models {", ".join(DATA_SUMMARY["algorithms"])} ## 📋 Event Types {", ".join(DATA_SUMMARY["event_types"])} """ return summary_text def get_sample_data(): """Return sample of the data""" if PREDICTIONS_DF is None or PREDICTIONS_DF.empty: return pd.DataFrame({"message": ["No data available"]}) # Return first 10 rows with key columns sample_cols = ["event_id", "question", "event_type", "algorithm_name", "actual_prediction", "result", "open_to_bet_until"] available_cols = [col for col in sample_cols if col in PREDICTIONS_DF.columns] return PREDICTIONS_DF[available_cols].head(10) def refresh_all_data(date_range=None): """Refresh all data and return updated components""" download_and_process_data() return ( get_leaderboard(date_range), get_data_summary(), get_sample_data(), ) # Download and process data on startup download_and_process_data() # Create Gradio interface with gr.Blocks(css=CUSTOM_CSS, title="FutureBench Leaderboard") as demo: gr.HTML(TITLE) with gr.Row(): gr.Image("image/image.png", height=200, width=200, show_label=False, show_download_button=False, show_fullscreen_button=False, container=False, elem_classes="center-logo") gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") with gr.Tabs(): with gr.TabItem("🏆 Leaderboard"): leaderboard_display = gr.Dataframe(value=get_leaderboard(), interactive=False, wrap=True, elem_id="leaderboard-table") with gr.Row(): date_slider = RangeSlider( minimum=0, maximum=len(PREDICTION_DATES) - 1, value=(0, len(PREDICTION_DATES) - 1), step=1, label="📅 Date Range", show_label=True, labels=[str(d) for d in PREDICTION_DATES], ) # Update leaderboard when date range is changed date_slider.change(get_leaderboard, inputs=date_slider, outputs=leaderboard_display) with gr.TabItem("📊 Summary"): summary_display = gr.Markdown(get_data_summary(), elem_classes="markdown-text") refresh_summary_btn = gr.Button("🔄 Refresh Summary") refresh_summary_btn.click(lambda: get_data_summary(), outputs=summary_display) with gr.TabItem("🔍 Sample Data"): sample_display = gr.Dataframe(value=get_sample_data(), interactive=False, wrap=True) refresh_sample_btn = gr.Button("🔄 Refresh Sample") refresh_sample_btn.click(lambda: get_sample_data(), outputs=sample_display) with gr.TabItem("📋 About"): gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text") if __name__ == "__main__": scheduler = BackgroundScheduler() scheduler.add_job(restart_space, "interval", seconds=1800) scheduler.start() demo.queue(default_concurrency_limit=40).launch()