import json, glob, os import pandas as pd from typing import Dict, List def _read_rows(results_dir: str) -> pd.DataFrame: rows = [] for path in glob.glob(os.path.join(results_dir, "*.json")): with open(path, "r") as f: data = json.load(f) for _, rec in data.items(): rows.append({ "Model": rec.get("model"), "Corpus": rec.get("corpus"), "Split": rec.get("split"), "WER": rec.get("wer"), "CER": rec.get("cer"), "Domain": rec.get("domain", ""), # 'in' or 'out' or '' "Evaluated": rec.get("evaluated_at"), }) return pd.DataFrame(rows) def per_dataset_tables(results_dir: str) -> Dict[str, pd.DataFrame]: """Return a dict of {corpus_name: DataFrame(Model, WER, CER)}""" df = _read_rows(results_dir) if df.empty: return {} tables = {} for corpus, g in df.groupby("Corpus"): t = ( g.sort_values("WER", ascending=True) .loc[:, ["Model", "WER", "CER"]] .reset_index(drop=True) ) tables[corpus] = t return tables def overall_table(results_dir: str) -> pd.DataFrame: """ Return a single table with per-model averages: - Avg WER (all corpora) - Avg CER (all corpora) - Avg WER (excl. in-domain) - Avg CER (excl. in-domain) """ df = _read_rows(results_dir) if df.empty: return pd.DataFrame() # compute overall averages agg_all = ( df.groupby("Model")[["WER", "CER"]] .mean() .rename(columns={"WER": "Avg WER (all)", "CER": "Avg CER (all)"}) ) # compute averages excluding in-domain rows df_out = df[df["Domain"] != "in"] if df_out.empty: agg_out = pd.DataFrame(index=agg_all.index, columns=["Avg WER (excl in)", "Avg CER (excl in)"]) else: agg_out = ( df_out.groupby("Model")[["WER", "CER"]] .mean() .rename(columns={"WER": "Avg WER (excl in)", "CER": "Avg CER (excl in)"}) ) # merge and sort by best average WER (all) merged = pd.concat([agg_all, agg_out], axis=1) merged = merged.reset_index() if "Avg WER (all)" in merged.columns: merged = merged.sort_values("Avg WER (all)", ascending=True) return merged def list_corpora(results_dir: str) -> List[str]: df = _read_rows(results_dir) if df.empty: return [] return sorted(df["Corpus"].unique().tolist())