import json
import re

import gradio as gr
import numpy
import pandas as pd

from src.display.css_html_js import custom_css
from src.about import (
    INTRODUCTION_TEXT,
    TITLE,
    AUTHORS,
)
from src.display.formatting import make_clickable_model
from plot_results import create_performance_plot

demo = gr.Blocks(css=custom_css)
with demo:
    gr.HTML(TITLE)
    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

    # Load dataframe from JSON
    with open("benchmark_report.json", "r") as f:
        json_data = json.load(f)
    
    # Create dataframe from JSON data
    leaderboard_df = pd.DataFrame(json_data)
    
    # Rename columns for consistency
    leaderboard_df = leaderboard_df.rename(columns={
        "Model Name": "Model Path",
        "Model Size": "Params"
    })
    
    # Calculate overall benchmark score as average of Avg (object) and Avg (country)
    leaderboard_df["Avg"] = (leaderboard_df["Avg (object)"] + leaderboard_df["Avg (country)"]) / 2
    
    # Select and reorder columns for display (removed Percentage Questions Parseable)
    display_columns = [
        "Model Path", "Params", "Avg",
        "Avg (object)", "Avg (country)", 
        "History (object)", "History (country)",
        "Geography (object)", "Geography (country)",
        "Art & Entertainment (object)", "Art & Entertainment (country)",
        "Culture & Tradition (object)", "Culture & Tradition (country)"
    ]
    
    leaderboard_df = leaderboard_df[display_columns]

    # Convert Params column - replace "-" with NaN and convert numeric strings to float
    leaderboard_df["Params"] = leaderboard_df["Params"].replace("-", numpy.nan)
    # Convert numeric strings directly to float (no regex needed since values are already clean numbers)
    leaderboard_df.loc[leaderboard_df["Params"].notna(), "Params"] = leaderboard_df.loc[leaderboard_df["Params"].notna(), "Params"].astype(float)

    # Sort by benchmark score
    leaderboard_df = leaderboard_df.sort_values(by=["Avg"], ascending=[False])
    
    # Print model names and scores to console before HTML formatting
    print("\n===== MODEL RESULTS =====")
    print("Avg is calculated as: (Avg (object) + Avg (country)) / 2")
    for index, row in leaderboard_df.iterrows():
        print(f"{row['Model Path']}: {row['Avg']:.2f}")
    print("========================\n")
    
    # Apply HTML formatting for display
    leaderboard_df["Model Path"] = leaderboard_df["Model Path"].apply(lambda x: make_clickable_model(x))
    
    # Rename column for display
    leaderboard_df = leaderboard_df.rename(columns={"Model Path": "Model"})
    leaderboard_df.to_csv("output.csv")
    
    # Set midpoint for gradient coloring based on data ranges

    leaderboard_df_styled = leaderboard_df.style.background_gradient(
        cmap="RdYlGn"
    )
    leaderboard_df_styled = leaderboard_df_styled.background_gradient(
        cmap="RdYlGn_r", 
        subset=['Params'],
        vmax=150
    )

    # Set up number formatting (removed Percentage Questions Parseable)
    rounding = {
        "Avg": "{:.2f}",
        "Params": "{:.0f}",
        "Avg (object)": "{:.2f}",
        "Avg (country)": "{:.2f}",
        "History (object)": "{:.2f}",
        "History (country)": "{:.2f}",
        "Geography (object)": "{:.2f}",
        "Geography (country)": "{:.2f}",
        "Art & Entertainment (object)": "{:.2f}",
        "Art & Entertainment (country)": "{:.2f}",
        "Culture & Tradition (object)": "{:.2f}",
        "Culture & Tradition (country)": "{:.2f}"
    }
    leaderboard_df_styled = leaderboard_df_styled.format(rounding)

    # Create dataframe component with appropriate datatypes
    datatypes = ['markdown', 'number'] + ['number'] * (len(display_columns) - 1)
    
    leaderboard_table = gr.components.Dataframe(
        value=leaderboard_df_styled,
        datatype=datatypes,
        elem_id="leaderboard-table",
        interactive=False,
        visible=True,
    )

    # Create and show the performance plot below the table
    fig = create_performance_plot()
    plot = gr.Plot(value=fig, elem_id="performance-plot")

    gr.Markdown(AUTHORS, elem_classes="markdown-text")

    # csv = gr.File(interactive=False, value="output.csv", visible=True)

    demo.queue(default_concurrency_limit=40).launch()