Spaces:
Running
Running
Lakoc
Leaderboard split into 4 categories, updates of the logic and GT added, simplified CER for Mandarin
7fc9a28
import json | |
from pathlib import Path | |
from typing import Dict, List | |
import meeteval.io | |
import pandas as pd | |
from txt_norm import get_text_norm | |
from utils import calc_wer, aggregate_wer_metrics | |
class LeaderboardServer: | |
"""Manages ASR model submissions and leaderboard generation.""" | |
def __init__(self, | |
reference_base_path: str = "references", | |
tasks_metadata_path: str = "tasks_metadata.json", | |
local_leaderboard_path: str = "submissions"): | |
"""Initialize the leaderboard server. | |
Args: | |
reference_base_path: Base path for reference files | |
tasks_metadata_path: Path to tasks metadata JSON file | |
local_leaderboard_path: Directory for storing submissions | |
""" | |
self.reference_base_path = Path(reference_base_path).resolve() | |
self.tasks_metadata_path = Path(tasks_metadata_path).resolve() | |
self.local_leaderboard = Path(local_leaderboard_path).resolve() | |
# Load tasks metadata | |
self.tasks_metadata = self._load_tasks_metadata() | |
# Initialize storage | |
self.local_leaderboard.mkdir(exist_ok=True) | |
self.text_normalizer = get_text_norm("whisper_nsf") | |
def _load_tasks_metadata(self) -> Dict: | |
"""Load tasks metadata from JSON file.""" | |
try: | |
with open(self.tasks_metadata_path) as f: | |
return json.load(f)["tasks"] | |
except (FileNotFoundError, KeyError, json.JSONDecodeError) as e: | |
raise ValueError(f"Failed to load tasks metadata: {e}") | |
def _get_results_file_path(self, task: str) -> Path: | |
"""Get the path to the results file for a specific task.""" | |
return self.local_leaderboard / f"{task}_results.json" | |
def _create_submission_id(self, metadata: Dict[str, str]) -> str: | |
"""Create a unique submission ID from metadata.""" | |
return f"{metadata['submitted_by']}_{metadata['model_id']}" | |
def _normalize_text_if_needed(self, segment: Dict, normalize: bool) -> Dict: | |
"""Apply text normalization to a segment if requested.""" | |
if normalize: | |
return {**segment, "words": self.text_normalizer(segment["words"])} | |
return segment | |
def _evaluate_dataset(self, | |
hyp_seglst, | |
ref_seglst, | |
normalize: bool = False) -> Dict: | |
"""Evaluate WER for a single dataset.""" | |
# Apply normalization if requested | |
if normalize: | |
ref_seglst = ref_seglst.map(lambda seg: self._normalize_text_if_needed(seg, True)) | |
hyp_seglst = hyp_seglst.map(lambda seg: self._normalize_text_if_needed(seg, True)) | |
# Calculate WER metrics | |
per_session_wers = calc_wer( | |
tcp_hyp_seglst=hyp_seglst, | |
ref_seglst=ref_seglst, | |
collar=5, | |
metrics_list=["tcp_wer"] | |
) | |
return aggregate_wer_metrics(per_session_wers, ["tcp_wer"]) | |
def _load_existing_results(self, task: str) -> Dict: | |
"""Load existing results for a task, or return empty dict.""" | |
results_path = self._get_results_file_path(task) | |
if results_path.exists(): | |
with open(results_path) as f: | |
return json.load(f) | |
return {} | |
def _save_results(self, task: str, results: Dict) -> None: | |
"""Save results to the task results file.""" | |
results_path = self._get_results_file_path(task) | |
with open(results_path, "w") as f: | |
json.dump(results, f, indent=2) | |
def _save_hypothesis_file(self, | |
task: str, | |
submission_id: str, | |
source_file: str) -> None: | |
"""Save the hypothesis file for future reference.""" | |
hyp_filename = f"{task}_{submission_id}_hyp.json" | |
hyp_filepath = self.local_leaderboard / hyp_filename | |
with open(hyp_filepath, "w") as out_f: | |
with open(source_file, "r") as in_f: | |
out_f.write(in_f.read()) | |
def prepare_model_for_submission(self, | |
file: str, | |
metadata: Dict[str, str], | |
task: str, | |
datasets: List[str], | |
normalize: bool = False) -> None: | |
"""Prepare and evaluate a model submission. | |
Args: | |
file: Path to the hypothesis file | |
metadata: Submission metadata containing 'submitted_by' and 'model_id' | |
task: Task name | |
datasets: List of dataset names to evaluate on | |
normalize: Whether to apply text normalization | |
""" | |
submission_id = self._create_submission_id(metadata) | |
# Load hypothesis segments | |
hyp_seglst = meeteval.io.load(file) | |
# Evaluate on each dataset | |
results = {} | |
for dataset in datasets: | |
ref_path = self.reference_base_path / task / f"{dataset}.json" | |
if not ref_path.exists(): | |
raise FileNotFoundError(f"Reference file not found: {ref_path}") | |
ref_seglst = meeteval.io.load(ref_path) | |
sessions = ref_seglst.unique('session_id') | |
# Filter hypotheses to match reference sessions | |
local_hyps = hyp_seglst.filter(lambda seg: seg['session_id'] in sessions) | |
if "alimeeting" in dataset or "aishell4" in dataset: | |
import opencc | |
converter = opencc.OpenCC('s2t.json') | |
local_hyps = local_hyps.map(lambda seg: {**seg, "words": " ".join(list(converter.convert(seg["words"])))}) | |
ref_seglst = ref_seglst.map(lambda seg: {**seg, "words": " ".join(list(converter.convert(seg["words"])))}) | |
# Evaluate this dataset | |
results[dataset] = self._evaluate_dataset(local_hyps, ref_seglst, normalize) | |
# Update results file | |
all_results = self._load_existing_results(task) | |
all_results[submission_id] = { | |
"model_link": metadata["model_link"], | |
"model_id": metadata["model_id"], | |
"submitted_by": metadata["submitted_by"], | |
"results": results | |
} | |
self._save_results(task, all_results) | |
self._save_hypothesis_file(task, submission_id, file) | |
def make_clickable_model(model_name, link): | |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' | |
def get_leaderboard(self, task: str) -> pd.DataFrame: | |
"""Generate leaderboard DataFrame for a specific task. | |
Args: | |
task: Task name | |
Returns: | |
DataFrame containing leaderboard results | |
""" | |
results_path = self._get_results_file_path(task) | |
if not results_path.exists(): | |
return pd.DataFrame(columns=["No submissions yet"]) | |
with open(results_path) as f: | |
results = json.load(f) | |
if not results: | |
return pd.DataFrame(columns=["No submissions yet"]) | |
# Build rows for DataFrame | |
rows = [] | |
for content in results.values(): | |
row = { | |
"Model ID": self.make_clickable_model(content["model_id"], content["model_link"]), | |
"Submitted by": content["submitted_by"] | |
} | |
# Add dataset results | |
for dataset, metrics in content["results"].items(): | |
row[dataset] = metrics.get("tcp_wer") | |
rows.append(row) | |
df = pd.DataFrame(rows) | |
if df.empty: | |
return df | |
# Convert WER to percentage and format | |
numeric_columns = df.select_dtypes(include=['number']).columns | |
df[numeric_columns] *= 100.0 | |
df = df.round(2) | |
df = df.fillna("-") | |
return df | |