import json
import os
import re
import uuid
from pathlib import Path

import pandas as pd
import streamlit as st
from datasets import load_dataset
from huggingface_hub import CommitScheduler

from src.check_validity import validate_model

# define page config
st.set_page_config(page_title="IVACE Leaderboard", layout="wide")

# setup scheduler to upload user requests
request_file = Path("user_request/") / f"data_{uuid.uuid4()}.json"
request_folder = request_file.parent


# columns = [
#     "eval_name",
#     "Model",
#     "Type",
#     "Average ⬆️",
#     "IFEval",
#     "MMLU-PRO",
#     "GPQA",
#     "MUSR",
#     "CO₂ cost (kg)",
# ]
# languages
lang_list = ["Spain", "Portuguese", "English", "Spanish", "Costa Rica", "Mexico", "Peru", "Uruguay", "Basque", "Catalan", "Galician"]

# column order
model_columns = ["model_name", "url", "type"]

scheduler = CommitScheduler(
    repo_id="iberbench/ivace-user-request",
    repo_type="dataset",
    private=True,
    folder_path=request_folder,
    token=st.secrets["HF_TOKEN"],
    path_in_repo="data",
    every=10,
)


def log_submission(input_dict: dict) -> None:
    """
    Append input/outputs and user feedback to a JSON Lines file using a thread lock to avoid concurrent writes from different users.
    """
    with scheduler.lock:
        with request_file.open("a") as f:
            f.write(json.dumps(input_dict))
            f.write("\n")


# def get_url(html_content: str) -> str:
#     match = re.search(r'href=["\'](https?://[^\s"\']+)', html_content)
#     if match:
#         url = match.group(1)
#         return url
#     else:
#         raise ValueError("Url not found in the link")


def get_lang_columns(columns: list, lang: str):
    """Filter columns per language"""
    lang_norm = lang.lower().replace(" ", "_")

    return [col for col in columns if lang_norm in col]


@st.cache_data
def load_data(lang) -> pd.DataFrame:
    try:
        data = (
            load_dataset("iberbench/lm-eval-results-ac", token=st.secrets["HF_TOKEN"])["train"]
            .to_pandas()
        )
        # filter lang columns
        task_columns = [col for col in data.columns if col not in model_columns]
        task_lang_columns = get_lang_columns(task_columns, lang)
        data = data[model_columns + task_lang_columns]

        # data["Model"] = data["Model"].apply(get_url)
        # data.sort_values(by="Average ⬆️", ascending=False, inplace=True)
        # data.reset_index(drop=True, inplace=True)

        # add column to apply filtering
        data["Active"] = False

        return data
    except FileNotFoundError:
        st.error("iberbench/lm-eval-results-ac was not found in the hub")
        return pd.DataFrame()


# functions to create filter
def active_data(lang) -> pd.DataFrame:
    """Change all records as active"""
    return st.session_state[f"leaderboard_data_{lang}"][
        st.session_state[f"leaderboard_data_{lang}"]["Active"] == True
    ].copy()


def get_index(lang, row) -> pd.Series:
    """Get index of the row"""
    return active_data(lang).iloc[row].name


def commit(lang) -> None:
    """Commit changes to the session state"""
    for row in st.session_state[f"edited_data_{lang}"]["edited_rows"]:
        row_index = get_index(lang, row)
        for key, value in st.session_state[f"edited_data_{lang}"][
            "edited_rows"
        ][row].items():
            st.session_state[f"leaderboard_data_{lang}"].at[
                row_index, key
            ] = value


def create_search_per_language(lang: str, search_dict: dict):
    if not st.session_state[f"leaderboard_data_{lang}"].empty:
        search_dict[lang] = st.text_input(
            "Search for ...",
            key=f"search_input_{lang}",
            on_change=commit,
            kwargs={"lang": lang},
        )
        if search_dict[lang] == "":
            st.session_state[f"leaderboard_data_{lang}"].Active = True
        else:
            st.session_state[f"leaderboard_data_{lang}"].Active = False
            st.session_state[f"leaderboard_data_{lang}"].loc[
                st.session_state[f"leaderboard_data_{lang}"][
                    "model_name"
                ].str.contains(search_dict[lang], case=False),
                "Active",
            ] = True

        # select columns to display
        task_columns = [col for col in st.session_state[f"leaderboard_data_{lang}"].columns if col not in model_columns]
        task_lang_columns = get_lang_columns(task_columns, lang)
        columns = model_columns + task_lang_columns

        edited_data = st.data_editor(
            active_data(lang),
            column_order=columns,
            key=f"edited_data_{lang}",
            hide_index=False,
            # column_config={"Model": st.column_config.LinkColumn("Model")},
            column_config={"url": st.column_config.LinkColumn("url")},
        )
    else:
        st.write("No data found to display on leaderboard.")


# streamlit UI
for lang in lang_list:
    # todo: load a different dataset per language of load different column per lang
    leaderboard_data = load_data(lang)
    if f"leaderboard_data_{lang}" not in st.session_state:
        st.session_state[f"leaderboard_data_{lang}"] = leaderboard_data

tabs = st.tabs(["Leaderboard", "Submit model"])
search_dict = {}

with tabs[0]:
    # logo image
    cols_logo = st.columns(5, vertical_alignment="center")
    with cols_logo[2]:
        st.image("assets/images/hf-logo.png", use_container_width=True)

    # title
    st.markdown(
        """
        <div style="text-align: center;">
            <h1>IVACE LLM Leaderboard</h1>
            <p style="font-size: 1.2rem;">
                Comparing Large Language Models in an <span style="font-weight: 600;">open</span> 
                and <span style="font-weight: 600;">reproducible</span> way
            </p>
        </div>
        """,
        unsafe_allow_html=True,
    )

    # create tabs per language
    lang_tabs = st.tabs(lang_list)

    for lang, lt in zip(lang_list, lang_tabs):
        with lt:
            create_search_per_language(lang, search_dict)


with tabs[1]:
    st.header("Submit model")

    def get_id_number(id_val):
        html_template = f"""
        <div style="display: flex; align-items: flex-start; margin-bottom: 1rem;">
            <div style="
                width: 32px; 
                height: 32px; 
                border-radius: 50%; 
                display: flex; 
                align-items: center; 
                justify-content: center; 
                border: 1px solid #007BFF; 
                color: #007BFF; 
                font-size: 0.875rem; 
                font-weight: 600; 
                background-color: transparent;">
                {id_val}
            </div>"""
        return html_template

    # create guide info
    guide_info_list = []
    html_path = "assets/html"
    for filename in os.listdir(html_path):
        file_path = os.path.join(html_path, filename)
        with open(file_path, "r", encoding="utf-8") as file:
            guide_info_list.append(file.read())

    # display adding number id
    for i, info_div in enumerate(guide_info_list):
        st.markdown(get_id_number(i + 1) + info_div, unsafe_allow_html=True)

    with st.form("submit_model_form"):
        model_name = st.text_input(
            "Model Name (format: user_name/model_name)",
            help="Your model should be public on the Hub and follow the username/model-id format (e.g. mistralai/Mistral-7B-v0.1).",
        )
        description = st.text_area(
            "Description",
            help="Add a description of the proposed model for the evaluation to help prioritize its evaluation",
        )
        user_contact = st.text_input(
            "Your Contact Email",
            help="User e-mail to contact when there are updates",
        )
        precision_option = st.selectbox(
            "Choose precision format:",
            help="Size limits vary by precision: • FP16/BF16: up to 100B parameters • 8-bit: up to 280B parameters (2x) • 4-bit: up to 560B parameters (4x) Choose carefully as incorrect precision can cause evaluation errors.",
            options=["float16", "bfloat16", "8bit", "4bit", "GPTQ"],
            index=0,
        )
        weight_type_option = st.selectbox(
            "Select what type of weights are being loaded from the checkpoint provided:",
            help="Original: Complete model weights in safetensors format Delta: Weight differences from base model (requires base model for size calculation) Adapter: Lightweight fine-tuning layers (requires base model for size calculation)",
            options=["Original", "Adapter", "Delta"],
            index=0,
        )
        base_model_name = st.text_input(
            "Base model",
            help="Required for delta weights or adapters. This information is used to identify the original model and calculate the total parameter count by combining base model and adapter/delta parameters.",
            value="",
        )
        model_type = st.selectbox(
            "Choose model type:",
            help="🟢 Pretrained: Base models trained on text using masked modeling 🔶 Fine-tuned: Domain-specific optimization 💬 Chat: Models using RLHF, DPO, or IFT for conversation 🤝 Merge: Combined weights without additional training",
            options=[
                "🟢 Pretrained",
                "🔶 Fine-tuned",
                "💬 Chat",
                "🤝 Merge",
            ],
        )
        submit_button = st.form_submit_button("Submit Request")

        if submit_button:
            # validate model size, license, chat_templates
            use_chat_template = True if model_type == "💬 Chat" else False
            validation_error = validate_model(
                model_name,
                precision_option,
                base_model_name,
                weight_type_option,
                use_chat_template,
            )
            if validation_error is not None:
                st.error(validation_error)
            elif not re.match(r"[^@]+@[^@]+\.[^@]+", user_contact):
                st.error("Invalid email address.")
            else:
                input_dict = {
                    "model_name": model_name,
                    "description": description,
                    "user_contact": user_contact,
                    "precision_option": precision_option,
                    "weight_type_option": weight_type_option,
                    "base_model_name": base_model_name,
                    "model_type": model_type,
                }
                try:
                    log_submission(input_dict)
                    st.success("Your request has been sent successfully.")
                except Exception as e:
                    st.error(
                        f"Failed to send your request: {e}. Please try again later."
                    )