import plotly.graph_objects as go import numpy as np import pandas as pd import json from leaderboard_utils import ( get_organization, get_mario_leaderboard, get_sokoban_leaderboard, get_2048_leaderboard, get_candy_leaderboard, get_tetris_leaderboard, get_tetris_planning_leaderboard, get_combined_leaderboard, GAME_ORDER ) # Load model colors with open('assets/model_color.json', 'r') as f: MODEL_COLORS = json.load(f) GAME_SCORE_COLUMNS = { "Super Mario Bros": "Score", "Sokoban": "Levels Cracked", "2048": "Score", "Candy Crash": "Average Score", "Tetris (complete)": "Score", "Tetris (planning only)": "Score" } def get_model_prefix(name): return name.split('-')[0] def normalize_values(values, mean, std): if std == 0: return [50 if v > 0 else 0 for v in values] z_scores = [(v - mean) / std for v in values] return [max(0, min(100, (z * 30) + 50)) for z in z_scores] def simplify_model_name(name): if name == "claude-3-7-sonnet-20250219(thinking)": name ="claude-3-7-thinking" parts = name.split('-') return '-'.join(parts[:4]) + '-...' if len(parts) > 4 else name def create_horizontal_bar_chart(df, game_name): if game_name == "Super Mario Bros": score_col = "Score" df_sorted = df.sort_values(by=score_col, ascending=True) elif game_name == "Sokoban": # Process Sokoban scores by splitting and getting max level def get_max_level(levels_str): try: # Split by semicolon, strip whitespace, filter empty strings, convert to integers levels = [int(x.strip()) for x in levels_str.split(";") if x.strip()] return max(levels) if levels else 0 except: return 0 # Create a temporary column with max levels df['Max Level'] = df['Levels Cracked'].apply(get_max_level) df_sorted = df.sort_values(by='Max Level', ascending=True) score_col = 'Max Level' elif game_name == "2048": score_col = "Score" df_sorted = df.sort_values(by=score_col, ascending=True) elif game_name == "Candy Crash": score_col = "Average Score" df_sorted = df.sort_values(by=score_col, ascending=True) elif game_name in ["Tetris (complete)", "Tetris (planning only)"]: score_col = "Score" df_sorted = df.sort_values(by=score_col, ascending=True) else: return None x = df_sorted[score_col] y = [f"{simplify_model_name(row['Player'])} [{row['Organization']}]" for _, row in df_sorted.iterrows()] colors = [MODEL_COLORS.get(row['Player'], '#808080') for _, row in df_sorted.iterrows()] texts = [f"{v:.1f}" if game_name == "Candy Crash" else f"{int(v)}" for v in x] fig = go.Figure(go.Bar( x=x, y=y, orientation='h', marker_color=colors, text=texts, textposition='auto', hovertemplate='%{y}
Score: %{x}' )) fig.update_layout( autosize=False, width=800, height=600, margin=dict(l=150, r=150, t=40, b=200), title=dict( text=f"{game_name} Performance", pad=dict(t=10) ), yaxis=dict(automargin=True), legend=dict( font=dict(size=9), itemsizing='trace', x=1.1, y=1, xanchor='left', yanchor='top', bgcolor='rgba(255,255,255,0.6)', bordercolor='gray', borderwidth=1 ) ) return fig def create_radar_charts(df): game_cols = [c for c in df.columns if c.endswith(" Score")] categories = [c.replace(" Score", "") for c in game_cols] for col in game_cols: vals = df[col].replace("_", 0).astype(float) mean, std = vals.mean(), vals.std() df[f"norm_{col}"] = normalize_values(vals, mean, std) fig = go.Figure() for _, row in df.iterrows(): player = row["Player"] r = [row[f"norm_{c}"] for c in game_cols] color = MODEL_COLORS.get(player, '#808080') # fallback to gray fig.add_trace(go.Scatterpolar( r=r + [r[0]], theta=categories + [categories[0]], mode='lines+markers', fill='toself', name=player, line=dict(color=color, width=2), marker=dict(color=color), fillcolor=color + '33', # add transparency to fill (33 = ~20% opacity) opacity=0.8 )) fig.update_layout( autosize=False, width=800, height=600, margin=dict(l=80, r=150, t=40, b=100), title=dict( text="Radar Chart of AI Performance (Normalized)", pad=dict(t=10) ), polar=dict(radialaxis=dict(visible=True, range=[0, 100])), legend=dict( font=dict(size=9), itemsizing='trace', x=1.4, y=1, xanchor='left', yanchor='top', bgcolor='rgba(255,255,255,0.6)', bordercolor='gray', borderwidth=1 ) ) return fig def get_combined_leaderboard_with_radar(rank_data, selected_games): df = get_combined_leaderboard(rank_data, selected_games) # Create a copy for visualization to avoid modifying the original df_viz = df.copy() return df, create_radar_charts(df_viz) def create_group_bar_chart(df): active_games = [g for g in GAME_ORDER if f"{g} Score" in df.columns] game_cols = [f"{g} Score" for g in active_games] for col in game_cols: vals = df[col].replace("_", 0).astype(float) mean, std = vals.mean(), vals.std() df[f"norm_{col}"] = normalize_values(vals, mean, std) fig = go.Figure() for _, row in df.iterrows(): player = row["Player"] color = MODEL_COLORS.get(player, '#808080') # Default to gray if missing fig.add_trace(go.Bar( name=simplify_model_name(row["Player"]), x=active_games, y=[row[f"norm_{g} Score"] for g in active_games], marker_color=color )) fig.update_layout( autosize=False, width=800, height=600, margin=dict(l=80, r=150, t=40, b=200), title=dict( text="Grouped Bar Chart of AI Models", pad=dict(t=10) ), xaxis_title="Games", yaxis_title="Normalized Score", barmode='group', legend=dict( font=dict(size=9), itemsizing='trace', x=1.1, y=1, xanchor='left', yanchor='top', bgcolor='rgba(255,255,255,0.6)', bordercolor='gray', borderwidth=1 ) ) return fig def get_combined_leaderboard_with_group_bar(rank_data, selected_games): df = get_combined_leaderboard(rank_data, selected_games) # Create a copy for visualization to avoid modifying the original df_viz = df.copy() return df, create_group_bar_chart(df_viz) def hex_to_rgba(hex_color, alpha=0.2): hex_color = hex_color.lstrip('#') r = int(hex_color[0:2], 16) g = int(hex_color[2:4], 16) b = int(hex_color[4:6], 16) return f'rgba({r}, {g}, {b}, {alpha})' def create_single_radar_chart(df, selected_games=None, highlight_models=None): if selected_games is None: selected_games = ['Super Mario Bros', '2048', 'Candy Crash', 'Sokoban'] game_cols = [f"{game} Score" for game in selected_games] categories = selected_games # Normalize for col in game_cols: vals = df[col].replace("_", 0).astype(float) mean, std = vals.mean(), vals.std() df[f"norm_{col}"] = normalize_values(vals, mean, std) # Group players by prefix model_groups = {} for player in df["Player"]: prefix = get_model_prefix(player) model_groups.setdefault(prefix, []).append(player) # Order: grouped by prefix, then alphabetically grouped_players = [] for prefix in sorted(model_groups): grouped_players.extend(sorted(model_groups[prefix])) fig = go.Figure() for player in grouped_players: row = df[df["Player"] == player] if row.empty: continue row = row.iloc[0] is_highlighted = highlight_models and player in highlight_models color = 'red' if is_highlighted else MODEL_COLORS.get(player, '#808080') fillcolor = 'rgba(255, 0, 0, 0.3)' if is_highlighted else hex_to_rgba(color, 0.2) r = [row[f"norm_{col}"] for col in game_cols] fig.add_trace(go.Scatterpolar( r=r + [r[0]], theta=categories + [categories[0]], mode='lines+markers', fill='toself', name=simplify_model_name(row["Player"]), line=dict(color=color, width=4 if is_highlighted else 2), marker=dict(color=color), fillcolor=fillcolor, opacity=1.0 if is_highlighted else 0.7 )) fig.update_layout( autosize=False, width=800, height=600, margin=dict(l=80, r=150, t=40, b=100), title=dict( text="Single Radar Chart (Normalized Performance)", pad=dict(t=10) ), polar=dict(radialaxis=dict(visible=True, range=[0, 100])), legend=dict( font=dict(size=9), itemsizing='trace', x=1.4, y=1, xanchor='left', yanchor='top', bgcolor='rgba(255,255,255,0.6)', bordercolor='gray', borderwidth=1 ) ) return fig def get_combined_leaderboard_with_single_radar(rank_data, selected_games, highlight_models=None): df = get_combined_leaderboard(rank_data, selected_games) selected_game_names = [g for g, sel in selected_games.items() if sel] # Create a copy for visualization to avoid modifying the original df_viz = df.copy() return df, create_single_radar_chart(df_viz, selected_game_names, highlight_models) def create_organization_radar_chart(rank_data): df = get_combined_leaderboard(rank_data, {g: True for g in GAME_ORDER}) orgs = df["Organization"].unique() game_cols = [f"{g} Score" for g in GAME_ORDER if f"{g} Score" in df.columns] categories = [g.replace(" Score", "") for g in game_cols] avg_df = pd.DataFrame([ { **{col: df[df["Organization"] == org][col].replace("_", 0).astype(float).mean() for col in game_cols}, "Organization": org } for org in orgs ]) for col in game_cols: vals = avg_df[col] mean, std = vals.mean(), vals.std() avg_df[f"norm_{col}"] = normalize_values(vals, mean, std) fig = go.Figure() for _, row in avg_df.iterrows(): r = [row[f"norm_{col}"] for col in game_cols] fig.add_trace(go.Scatterpolar( r=r + [r[0]], theta=categories + [categories[0]], mode='lines+markers', fill='toself', name=row["Organization"] )) fig.update_layout( autosize=False, width=800, height=600, margin=dict(l=80, r=150, t=40, b=200), title=dict( text="Radar Chart: Organization Performance (Normalized)", pad=dict(t=10) ), polar=dict(radialaxis=dict(visible=True, range=[0, 100])), legend=dict( font=dict(size=9), itemsizing='trace', x=1.4, y=1, xanchor='left', yanchor='top', bgcolor='rgba(255,255,255,0.6)', bordercolor='gray', borderwidth=1 ) ) return fig def create_top_players_radar_chart(rank_data, n=5): df = get_combined_leaderboard(rank_data, {g: True for g in GAME_ORDER}) top_players = df.head(n)["Player"].tolist() top_df = df[df["Player"].isin(top_players)] game_cols = [f"{g} Score" for g in GAME_ORDER if f"{g} Score" in df.columns] categories = [g.replace(" Score", "") for g in game_cols] for col in game_cols: vals = top_df[col].replace("_", 0).astype(float) mean, std = vals.mean(), vals.std() top_df[f"norm_{col}"] = normalize_values(vals, mean, std) fig = go.Figure() for _, row in top_df.iterrows(): r = [row[f"norm_{col}"] for col in game_cols] fig.add_trace(go.Scatterpolar( r=r + [r[0]], theta=categories + [categories[0]], mode='lines+markers', fill='toself', name=simplify_model_name(row["Player"]) )) fig.update_layout( autosize=False, width=800, height=600, margin=dict(l=80, r=150, t=40, b=200), title=dict( text=f"Top {n} Players Radar Chart (Normalized)", pad=dict(t=10) ), polar=dict(radialaxis=dict(visible=True, range=[0, 100])), legend=dict( font=dict(size=9), itemsizing='trace', x=1.4, y=1, xanchor='left', yanchor='top', bgcolor='rgba(255,255,255,0.6)', bordercolor='gray', borderwidth=1 ) ) return fig def create_player_radar_chart(rank_data, player_name): df = get_combined_leaderboard(rank_data, {g: True for g in GAME_ORDER}) player_df = df[df["Player"] == player_name] if player_df.empty: return go.Figure().update_layout( title=dict(text="Player not found", pad=dict(t=10)), autosize=False, width=800, height=400 ) game_cols = [f"{g} Score" for g in GAME_ORDER if f"{g} Score" in df.columns] categories = [g.replace(" Score", "") for g in game_cols] for col in game_cols: vals = player_df[col].replace("_", 0).astype(float) mean, std = df[col].replace("_", 0).astype(float).mean(), df[col].replace("_", 0).astype(float).std() player_df[f"norm_{col}"] = normalize_values(vals, mean, std) fig = go.Figure() for _, row in player_df.iterrows(): r = [row[f"norm_{col}"] for col in game_cols] fig.add_trace(go.Scatterpolar( r=r + [r[0]], theta=categories + [categories[0]], mode='lines+markers', fill='toself', name=simplify_model_name(row["Player"]) )) fig.update_layout( autosize=False, width=800, height=600, margin=dict(l=80, r=150, t=40, b=200), title=dict( text=f"{simplify_model_name(player_name)} Radar Chart (Normalized)", pad=dict(t=10) ), polar=dict(radialaxis=dict(visible=True, range=[0, 100])), legend=dict( font=dict(size=9), itemsizing='trace', x=1.4, y=1, xanchor='left', yanchor='top', bgcolor='rgba(255,255,255,0.6)', bordercolor='gray', borderwidth=1 ) ) return fig def save_visualization(fig, filename): fig.write_image(filename)