import json import pandas as pd from pathlib import Path def load_leaderboard_from_json(json_path="leaderboard_data.json"): """Load leaderboard data from JSON file""" try: with open(json_path, 'r', encoding='utf-8') as f: data = json.load(f) return data['leaderboard'] except FileNotFoundError: print(f"JSON file {json_path} not found") return [] except json.JSONDecodeError: print(f"Error decoding JSON file {json_path}") return [] def create_leaderboard_df(json_path="leaderboard_data.json"): """Create a pandas DataFrame from JSON leaderboard data""" leaderboard_data = load_leaderboard_from_json(json_path) if not leaderboard_data: return pd.DataFrame() # Convert to DataFrame df = pd.DataFrame(leaderboard_data) # Sort by ACC score (descending) df = df.sort_values('acc', ascending=False).reset_index(drop=True) # Add ranking icons and make model names clickable links to papers def add_ranking_icon_and_link(index, model_name, paper_link): if index == 0: return f'🥇 {model_name}' elif index == 1: return f'🥈 {model_name}' elif index == 2: return f'🥉 {model_name}' else: return f'{model_name}' # Format the DataFrame for display display_df = pd.DataFrame({ 'Model': [add_ranking_icon_and_link(i, model, link) for i, (model, link) in enumerate(zip(df['model'], df['link']))], 'Release Date': df['release_date'], 'HF Model': df['hf'].apply(lambda x: f'🤗' if x != "-" else "-"), 'MoE': df['moe'].apply(lambda x: '-' if x == '-' else ('✓' if x else '✗')), 'Parameters': df['params'], 'Open Source': df['open_source'].apply(lambda x: '✓' if x else '✗'), 'ACC Score': df['acc'].apply(lambda x: f"{x:.1f}") }) return display_df def get_leaderboard_stats(json_path="leaderboard_data.json"): """Get statistics about the leaderboard""" leaderboard_data = load_leaderboard_from_json(json_path) if not leaderboard_data: return {} df = pd.DataFrame(leaderboard_data) stats = { 'total_models': len(df), 'open_source_models': df['open_source'].sum(), 'moe_models': df['moe'].apply(lambda x: 1 if x is True else 0).sum(), 'avg_acc': df['acc'].mean(), 'max_acc': df['acc'].max(), 'min_acc': df['acc'].min() } return stats