import json
from pathlib import Path
from typing import Dict, List

import meeteval.io
import pandas as pd

from txt_norm import get_text_norm
from utils import calc_wer, aggregate_wer_metrics


class LeaderboardServer:
    """Manages ASR model submissions and leaderboard generation."""

    def __init__(self,
                 reference_base_path: str = "references",
                 tasks_metadata_path: str = "tasks_metadata.json",
                 local_leaderboard_path: str = "submissions"):
        """Initialize the leaderboard server.

        Args:
            reference_base_path: Base path for reference files
            tasks_metadata_path: Path to tasks metadata JSON file
            local_leaderboard_path: Directory for storing submissions
        """
        self.reference_base_path = Path(reference_base_path).resolve()
        self.tasks_metadata_path = Path(tasks_metadata_path).resolve()
        self.local_leaderboard = Path(local_leaderboard_path).resolve()

        # Load tasks metadata
        self.tasks_metadata = self._load_tasks_metadata()

        # Initialize storage
        self.local_leaderboard.mkdir(exist_ok=True)
        self.text_normalizer = get_text_norm("whisper_nsf")

    def _load_tasks_metadata(self) -> Dict:
        """Load tasks metadata from JSON file."""
        try:
            with open(self.tasks_metadata_path) as f:
                return json.load(f)["tasks"]
        except (FileNotFoundError, KeyError, json.JSONDecodeError) as e:
            raise ValueError(f"Failed to load tasks metadata: {e}")

    def _get_results_file_path(self, task: str) -> Path:
        """Get the path to the results file for a specific task."""
        return self.local_leaderboard / f"{task}_results.json"

    def _create_submission_id(self, metadata: Dict[str, str]) -> str:
        """Create a unique submission ID from metadata."""
        return f"{metadata['submitted_by']}_{metadata['model_id']}"

    def _normalize_text_if_needed(self, segment: Dict, normalize: bool) -> Dict:
        """Apply text normalization to a segment if requested."""
        if normalize:
            return {**segment, "words": self.text_normalizer(segment["words"])}
        return segment

    def _evaluate_dataset(self,
                          hyp_seglst,
                          ref_seglst,
                          normalize: bool = False) -> Dict:
        """Evaluate WER for a single dataset."""
        # Apply normalization if requested
        if normalize:
            ref_seglst = ref_seglst.map(lambda seg: self._normalize_text_if_needed(seg, True))
            hyp_seglst = hyp_seglst.map(lambda seg: self._normalize_text_if_needed(seg, True))

        # Calculate WER metrics
        per_session_wers = calc_wer(
            tcp_hyp_seglst=hyp_seglst,
            ref_seglst=ref_seglst,
            collar=5,
            metrics_list=["tcp_wer"]
        )

        return aggregate_wer_metrics(per_session_wers, ["tcp_wer"])

    def _load_existing_results(self, task: str) -> Dict:
        """Load existing results for a task, or return empty dict."""
        results_path = self._get_results_file_path(task)
        if results_path.exists():
            with open(results_path) as f:
                return json.load(f)
        return {}

    def _save_results(self, task: str, results: Dict) -> None:
        """Save results to the task results file."""
        results_path = self._get_results_file_path(task)
        with open(results_path, "w") as f:
            json.dump(results, f, indent=2)

    def _save_hypothesis_file(self,
                              task: str,
                              submission_id: str,
                              source_file: str) -> None:
        """Save the hypothesis file for future reference."""
        hyp_filename = f"{task}_{submission_id}_hyp.json"
        hyp_filepath = self.local_leaderboard / hyp_filename

        with open(hyp_filepath, "w") as out_f:
            with open(source_file, "r") as in_f:
                out_f.write(in_f.read())

    def prepare_model_for_submission(self,
                                     file: str,
                                     metadata: Dict[str, str],
                                     task: str,
                                     datasets: List[str],
                                     normalize: bool = False) -> None:
        """Prepare and evaluate a model submission.

        Args:
            file: Path to the hypothesis file
            metadata: Submission metadata containing 'submitted_by' and 'model_id'
            task: Task name
            datasets: List of dataset names to evaluate on
            normalize: Whether to apply text normalization
        """
        submission_id = self._create_submission_id(metadata)

        # Load hypothesis segments
        hyp_seglst = meeteval.io.load(file)

        # Evaluate on each dataset
        results = {}
        for dataset in datasets:
            ref_path = self.reference_base_path / task / f"{dataset}.json"

            if not ref_path.exists():
                raise FileNotFoundError(f"Reference file not found: {ref_path}")

            ref_seglst = meeteval.io.load(ref_path)
            sessions = ref_seglst.unique('session_id')

            # Filter hypotheses to match reference sessions
            local_hyps = hyp_seglst.filter(lambda seg: seg['session_id'] in sessions)

            if "alimeeting" in dataset or "aishell4" in dataset:
                import opencc
                converter = opencc.OpenCC('s2t.json')
                local_hyps = local_hyps.map(lambda seg: {**seg, "words": " ".join(list(converter.convert(seg["words"])))})
                ref_seglst = ref_seglst.map(lambda seg: {**seg, "words": " ".join(list(converter.convert(seg["words"])))})

            # Evaluate this dataset
            results[dataset] = self._evaluate_dataset(local_hyps, ref_seglst, normalize)

        # Update results file
        all_results = self._load_existing_results(task)
        all_results[submission_id] = {
            "model_link": metadata["model_link"],
            "model_id": metadata["model_id"],
            "submitted_by": metadata["submitted_by"],
            "results": results
        }

        self._save_results(task, all_results)
        self._save_hypothesis_file(task, submission_id, file)

    @staticmethod
    def make_clickable_model(model_name, link):
        return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'

    def get_leaderboard(self, task: str) -> pd.DataFrame:
        """Generate leaderboard DataFrame for a specific task.

        Args:
            task: Task name

        Returns:
            DataFrame containing leaderboard results
        """
        results_path = self._get_results_file_path(task)

        if not results_path.exists():
            return pd.DataFrame(columns=["No submissions yet"])

        with open(results_path) as f:
            results = json.load(f)

        if not results:
            return pd.DataFrame(columns=["No submissions yet"])

        # Build rows for DataFrame
        rows = []
        for content in results.values():
            row = {
                "Model ID": self.make_clickable_model(content["model_id"], content["model_link"]),
                "Submitted by": content["submitted_by"]
            }

            # Add dataset results
            for dataset, metrics in content["results"].items():
                row[dataset] = metrics.get("tcp_wer")

            rows.append(row)

        df = pd.DataFrame(rows)

        if df.empty:
            return df

        # Convert WER to percentage and format
        numeric_columns = df.select_dtypes(include=['number']).columns
        df[numeric_columns] *= 100.0
        df = df.round(2)
        df = df.fillna("-")

        return df