Spaces:

meldynamics
/

lt-whisper-leaderboard

Running

File size: 3,235 Bytes

9346f1c
 
4ccccc6
4505a7e
4ccccc6
614ee1f
4ccccc6
1f60a20
4ccccc6
 
 
 
 
4b4926f
4ccccc6
818f024
4ccccc6
 
 
 
818f024
4ccccc6
818f024
4ccccc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3b890b
4ccccc6
 
 
8daa060
4ccccc6
 
 
 
 
 
 
 
8a21df1
 
 
 
 
 
4ccccc6
 
 
 
 
 
 
 
 
 
8daa060
10f9b3c
4ccccc6

import gradio as gr
import pandas as pd
from pathlib import Path

from src.asr.build_dataframe import per_dataset_tables, overall_table, list_corpora

RESULTS_DIR = Path(__file__).parent / "asr_data" / "results"

CSS = """
#df * { white-space: nowrap; }       /* keep single-line model names */
.gradio-container { max-width: 1200px !important; }
th, td { font-size: 14px; }
"""

def get_overall_df() -> pd.DataFrame:
    try:
        df = overall_table(str(RESULTS_DIR))
        return df.fillna("")
    except Exception as e:
        return pd.DataFrame({"Error": [f"{type(e).__name__}: {e}"]})

def get_per_corpus_df(corpus: str) -> pd.DataFrame:
    try:
        tables = per_dataset_tables(str(RESULTS_DIR))
        df = tables.get(corpus, pd.DataFrame())
        return df.fillna("")
    except Exception as e:
        return pd.DataFrame({"Error": [f"{type(e).__name__}: {e}"]})

with gr.Blocks(css=CSS) as demo:
    gr.Markdown("# 🇱🇹 Lithuanian ASR Leaderboard (Local Results)")

    with gr.Tab("Overall"):
        df_overall = gr.Dataframe(
            value=get_overall_df(),
            interactive=False,
            elem_id="df",
            label="Overall (averages)",
            wrap=False,
        )
        gr.Button("Reload overall").click(lambda: get_overall_df(), outputs=df_overall)

    # One tab per dataset
    corpora = list_corpora(str(RESULTS_DIR))
    for corpus in corpora:
        with gr.Tab(corpus.upper()):
            df_c = gr.Dataframe(
                value=get_per_corpus_df(corpus),
                interactive=False,
                elem_id="df",
                label=f"{corpus.upper()} (WER, CER)",
                wrap=False,
            )
            gr.Button(f"Reload {corpus}").click(lambda c=corpus: get_per_corpus_df(c), outputs=df_c)

    gr.Markdown(
        """
### Evaluation
- GPU: 1 × NVIDIA GeForce RTX 4090

### 📚 Models
- [DrishtiSharma/whisper-large-v2-lithuanian](https://huggingface.co/DrishtiSharma/whisper-large-v2-lithuanian)
  - [meldynamics/whisper-large-v2-lithuanian-ggml](https://huggingface.co/meldynamics/whisper-large-v2-lithuanian-ggml)
- [sam8000/whisper-large-v3-turbo-lithuanian-lithuania-0.06-cer-filtered](https://huggingface.co/sam8000/whisper-large-v3-turbo-lithuanian-lithuania-0.06-cer-filtered)
  - [meldynamics/whisper-large-v3-turbo-lithuanian-lithuania-0.06-cer-filtered-ggml](https://huggingface.co/meldynamics/whisper-large-v3-turbo-lithuanian-lithuania-0.06-cer-filtered-ggml)
- [Aismantas/whisper-base-lithuanian](https://huggingface.co/Aismantas/whisper-base-lithuanian)
  - [meldynamics/whisper-base-lithuanian-ggml](https://huggingface.co/meldynamics/whisper-base-lithuanian-ggml)

### 🗂 Datasets
- [Lithuania 0.06 CER Filtered](https://huggingface.co/datasets/sam8000/lithuania)
- [Liepa-2](https://huggingface.co/datasets/isLucid/liepa-2)
- [Common Voice 13.0 (Lithuanian subset)](https://huggingface.co/datasets/mozilla-foundation/common_voice_13_0)
- [Common Voice 17.0 (Lithuanian subset)](https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0)

---
*Tip:* add new JSON files to `asr_data/results/` and click **Reload** to refresh the tables.
        """
    )

if __name__ == "__main__":
    demo.launch()