import json from pathlib import Path from typing import Dict, List import meeteval.io import pandas as pd from txt_norm import get_text_norm from utils import calc_wer, aggregate_wer_metrics class LeaderboardServer: """Manages ASR model submissions and leaderboard generation.""" def __init__(self, reference_base_path: str = "references", tasks_metadata_path: str = "tasks_metadata.json", local_leaderboard_path: str = "submissions"): """Initialize the leaderboard server. Args: reference_base_path: Base path for reference files tasks_metadata_path: Path to tasks metadata JSON file local_leaderboard_path: Directory for storing submissions """ self.reference_base_path = Path(reference_base_path).resolve() self.tasks_metadata_path = Path(tasks_metadata_path).resolve() self.local_leaderboard = Path(local_leaderboard_path).resolve() # Load tasks metadata self.tasks_metadata = self._load_tasks_metadata() # Initialize storage self.local_leaderboard.mkdir(exist_ok=True) self.text_normalizer = get_text_norm("whisper_nsf") def _load_tasks_metadata(self) -> Dict: """Load tasks metadata from JSON file.""" try: with open(self.tasks_metadata_path) as f: return json.load(f)["tasks"] except (FileNotFoundError, KeyError, json.JSONDecodeError) as e: raise ValueError(f"Failed to load tasks metadata: {e}") def _get_results_file_path(self, task: str) -> Path: """Get the path to the results file for a specific task.""" return self.local_leaderboard / f"{task}_results.json" def _create_submission_id(self, metadata: Dict[str, str]) -> str: """Create a unique submission ID from metadata.""" return f"{metadata['submitted_by']}_{metadata['model_id']}" def _normalize_text_if_needed(self, segment: Dict, normalize: bool) -> Dict: """Apply text normalization to a segment if requested.""" if normalize: return {**segment, "words": self.text_normalizer(segment["words"])} return segment def _evaluate_dataset(self, hyp_seglst, ref_seglst, normalize: bool = False) -> Dict: """Evaluate WER for a single dataset.""" # Apply normalization if requested if normalize: ref_seglst = ref_seglst.map(lambda seg: self._normalize_text_if_needed(seg, True)) hyp_seglst = hyp_seglst.map(lambda seg: self._normalize_text_if_needed(seg, True)) # Calculate WER metrics per_session_wers = calc_wer( tcp_hyp_seglst=hyp_seglst, ref_seglst=ref_seglst, collar=5, metrics_list=["tcp_wer"] ) return aggregate_wer_metrics(per_session_wers, ["tcp_wer"]) def _load_existing_results(self, task: str) -> Dict: """Load existing results for a task, or return empty dict.""" results_path = self._get_results_file_path(task) if results_path.exists(): with open(results_path) as f: return json.load(f) return {} def _save_results(self, task: str, results: Dict) -> None: """Save results to the task results file.""" results_path = self._get_results_file_path(task) with open(results_path, "w") as f: json.dump(results, f, indent=2) def _save_hypothesis_file(self, task: str, submission_id: str, source_file: str) -> None: """Save the hypothesis file for future reference.""" hyp_filename = f"{task}_{submission_id}_hyp.json" hyp_filepath = self.local_leaderboard / hyp_filename with open(hyp_filepath, "w") as out_f: with open(source_file, "r") as in_f: out_f.write(in_f.read()) def prepare_model_for_submission(self, file: str, metadata: Dict[str, str], task: str, datasets: List[str], normalize: bool = False) -> None: """Prepare and evaluate a model submission. Args: file: Path to the hypothesis file metadata: Submission metadata containing 'submitted_by' and 'model_id' task: Task name datasets: List of dataset names to evaluate on normalize: Whether to apply text normalization """ submission_id = self._create_submission_id(metadata) # Load hypothesis segments hyp_seglst = meeteval.io.load(file) # Evaluate on each dataset results = {} for dataset in datasets: ref_path = self.reference_base_path / task / f"{dataset}.json" if not ref_path.exists(): raise FileNotFoundError(f"Reference file not found: {ref_path}") ref_seglst = meeteval.io.load(ref_path) sessions = ref_seglst.unique('session_id') # Filter hypotheses to match reference sessions local_hyps = hyp_seglst.filter(lambda seg: seg['session_id'] in sessions) if "alimeeting" in dataset or "aishell4" in dataset: import opencc converter = opencc.OpenCC('s2t.json') local_hyps = local_hyps.map(lambda seg: {**seg, "words": " ".join(list(converter.convert(seg["words"])))}) ref_seglst = ref_seglst.map(lambda seg: {**seg, "words": " ".join(list(converter.convert(seg["words"])))}) # Evaluate this dataset results[dataset] = self._evaluate_dataset(local_hyps, ref_seglst, normalize) # Update results file all_results = self._load_existing_results(task) all_results[submission_id] = { "model_link": metadata["model_link"], "model_id": metadata["model_id"], "submitted_by": metadata["submitted_by"], "results": results } self._save_results(task, all_results) self._save_hypothesis_file(task, submission_id, file) @staticmethod def make_clickable_model(model_name, link): return f'{model_name}' def get_leaderboard(self, task: str) -> pd.DataFrame: """Generate leaderboard DataFrame for a specific task. Args: task: Task name Returns: DataFrame containing leaderboard results """ results_path = self._get_results_file_path(task) if not results_path.exists(): return pd.DataFrame(columns=["No submissions yet"]) with open(results_path) as f: results = json.load(f) if not results: return pd.DataFrame(columns=["No submissions yet"]) # Build rows for DataFrame rows = [] for content in results.values(): row = { "Model ID": self.make_clickable_model(content["model_id"], content["model_link"]), "Submitted by": content["submitted_by"] } # Add dataset results for dataset, metrics in content["results"].items(): row[dataset] = metrics.get("tcp_wer") rows.append(row) df = pd.DataFrame(rows) if df.empty: return df # Convert WER to percentage and format numeric_columns = df.select_dtypes(include=['number']).columns df[numeric_columns] *= 100.0 df = df.round(2) df = df.fillna("-") return df