File size: 3,235 Bytes
9346f1c
 
4ccccc6
4505a7e
4ccccc6
614ee1f
4ccccc6
1f60a20
4ccccc6
 
 
 
 
4b4926f
4ccccc6
818f024
4ccccc6
 
 
 
818f024
4ccccc6
818f024
4ccccc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3b890b
4ccccc6
 
 
8daa060
4ccccc6
 
 
 
 
 
 
 
8a21df1
 
 
 
 
 
4ccccc6
 
 
 
 
 
 
 
 
 
8daa060
10f9b3c
4ccccc6
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import gradio as gr
import pandas as pd
from pathlib import Path

from src.asr.build_dataframe import per_dataset_tables, overall_table, list_corpora

RESULTS_DIR = Path(__file__).parent / "asr_data" / "results"

CSS = """
#df * { white-space: nowrap; }       /* keep single-line model names */
.gradio-container { max-width: 1200px !important; }
th, td { font-size: 14px; }
"""

def get_overall_df() -> pd.DataFrame:
    try:
        df = overall_table(str(RESULTS_DIR))
        return df.fillna("")
    except Exception as e:
        return pd.DataFrame({"Error": [f"{type(e).__name__}: {e}"]})

def get_per_corpus_df(corpus: str) -> pd.DataFrame:
    try:
        tables = per_dataset_tables(str(RESULTS_DIR))
        df = tables.get(corpus, pd.DataFrame())
        return df.fillna("")
    except Exception as e:
        return pd.DataFrame({"Error": [f"{type(e).__name__}: {e}"]})

with gr.Blocks(css=CSS) as demo:
    gr.Markdown("# πŸ‡±πŸ‡Ή Lithuanian ASR Leaderboard (Local Results)")

    with gr.Tab("Overall"):
        df_overall = gr.Dataframe(
            value=get_overall_df(),
            interactive=False,
            elem_id="df",
            label="Overall (averages)",
            wrap=False,
        )
        gr.Button("Reload overall").click(lambda: get_overall_df(), outputs=df_overall)

    # One tab per dataset
    corpora = list_corpora(str(RESULTS_DIR))
    for corpus in corpora:
        with gr.Tab(corpus.upper()):
            df_c = gr.Dataframe(
                value=get_per_corpus_df(corpus),
                interactive=False,
                elem_id="df",
                label=f"{corpus.upper()} (WER, CER)",
                wrap=False,
            )
            gr.Button(f"Reload {corpus}").click(lambda c=corpus: get_per_corpus_df(c), outputs=df_c)

    gr.Markdown(
        """
### Evaluation
- GPU: 1 Γ— NVIDIA GeForce RTX 4090

### πŸ“š Models
- [DrishtiSharma/whisper-large-v2-lithuanian](https://huggingface.co/DrishtiSharma/whisper-large-v2-lithuanian)
  - [meldynamics/whisper-large-v2-lithuanian-ggml](https://huggingface.co/meldynamics/whisper-large-v2-lithuanian-ggml)
- [sam8000/whisper-large-v3-turbo-lithuanian-lithuania-0.06-cer-filtered](https://huggingface.co/sam8000/whisper-large-v3-turbo-lithuanian-lithuania-0.06-cer-filtered)
  - [meldynamics/whisper-large-v3-turbo-lithuanian-lithuania-0.06-cer-filtered-ggml](https://huggingface.co/meldynamics/whisper-large-v3-turbo-lithuanian-lithuania-0.06-cer-filtered-ggml)
- [Aismantas/whisper-base-lithuanian](https://huggingface.co/Aismantas/whisper-base-lithuanian)
  - [meldynamics/whisper-base-lithuanian-ggml](https://huggingface.co/meldynamics/whisper-base-lithuanian-ggml)

### πŸ—‚ Datasets
- [Lithuania 0.06 CER Filtered](https://huggingface.co/datasets/sam8000/lithuania)
- [Liepa-2](https://huggingface.co/datasets/isLucid/liepa-2)
- [Common Voice 13.0 (Lithuanian subset)](https://huggingface.co/datasets/mozilla-foundation/common_voice_13_0)
- [Common Voice 17.0 (Lithuanian subset)](https://huggingface.co/datasets/mozilla-foundation/common_voice_17_0)

---
*Tip:* add new JSON files to `asr_data/results/` and click **Reload** to refresh the tables.
        """
    )

if __name__ == "__main__":
    demo.launch()