File size: 5,000 Bytes
d1950ed
85bf837
7c6e259
d1950ed
fd53fad
9803b15
 
d1950ed
13ea032
 
d1950ed
6f9f32c
 
 
 
d1950ed
 
 
2e4b9cc
9803b15
86c7e4c
9803b15
86c7e4c
13ea032
86c7e4c
9803b15
 
13ea032
9803b15
 
6f26b02
 
 
13ea032
9803b15
 
 
 
6f26b02
 
 
 
 
 
 
 
 
 
 
9803b15
13ea032
6f26b02
13ea032
 
 
 
 
9803b15
 
 
 
 
13ea032
 
 
 
 
9803b15
 
 
 
13ea032
9803b15
 
6f26b02
9803b15
 
 
 
13ea032
6f26b02
 
 
6f9f32c
85bf837
 
 
6f9f32c
13ea032
9803b15
6f26b02
ebf4fcc
13ea032
9803b15
 
 
 
 
13ea032
 
 
 
 
9803b15
 
 
 
 
 
 
 
6f9f32c
13ea032
 
 
 
 
 
 
86c7e4c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import gradio as gr
from datasets import load_dataset

# --- Configuration ---
DATASET_NAME = "Cnam-LMSSC/vibravox-test"
SUBSETS = ["speech_clean", "speech_noisy", "speechless_clean", "speechless_noisy"]
SPLITS = ["train", "validation", "test"]
TEXT_COLUMN = "raw_text"
PHONEMIZED_TEXT_COLUMN = "phonemized_text"
GENDER_COLUMN = "gender"
AUDIO_COLUMNS = [
    "audio.headset_microphone",
    "audio.throat_microphone",
    "audio.soft_in_ear_microphone",
    "audio.rigid_in_ear_microphone",
    "audio.forehead_accelerometer",
    "audio.temple_vibration_pickup"
]

# --- Main Application Logic ---

def load_and_update_all(subset, split):
    """
    Loads a new dataset and returns updates for the entire UI.
    """
    try:
        dataset = load_dataset(DATASET_NAME, name=subset, split=split)
        has_text_fields = TEXT_COLUMN in dataset.features

        sample = dataset[0]
        sentence = sample.get(TEXT_COLUMN)
        phonemized_text = sample.get(PHONEMIZED_TEXT_COLUMN)
        gender = sample.get(GENDER_COLUMN)
        
        raw_audio_data = [
            (sample[col]['sampling_rate'], sample[col]['array']) for col in AUDIO_COLUMNS
        ]
        
        # --- THE FIX IS HERE ---
        # We add a condition to handle datasets with only one row.
        dataset_len = len(dataset)
        if dataset_len <= 1:
            # If there's only one item, hide the slider as it's not needed.
            slider_update = gr.update(visible=False)
        else:
            # Otherwise, show and configure the slider as normal.
            slider_update = gr.update(maximum=dataset_len - 1, value=0, visible=True, interactive=True)
        # --------------------

        return (
            dataset,
            slider_update, # Use the new slider_update variable here
            gr.update(value=sentence, visible=has_text_fields),
            gr.update(value=phonemized_text, visible=has_text_fields),
            gr.update(value=gender, visible=has_text_fields),
            *raw_audio_data,
            gr.update(value="", visible=False)
        )
    except Exception as e:
        error_message = f"Failed to load {subset}/{split}. Error: {e}"
        empty_audio = (None, None)
        return (
            None,
            gr.update(visible=False),
            gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
            *[empty_audio] * len(AUDIO_COLUMNS),
            gr.update(value=error_message, visible=True)
        )

def get_audio_row(dataset, index):
    """
    Fetches a new row from the currently loaded dataset when the slider moves.
    """
    if dataset is None:
        return [None] * (3 + len(AUDIO_COLUMNS))

    index = int(index)
    sample = dataset[index]

    has_text_fields = TEXT_COLUMN in dataset.features
    sentence = sample.get(TEXT_COLUMN)
    phonemized_text = sample.get(PHONEMIZED_TEXT_COLUMN)
    gender = sample.get(GENDER_COLUMN)

    raw_audio_data = [
        (sample[col]['sampling_rate'], sample[col]['array']) for col in AUDIO_COLUMNS
    ]

    return [sentence, phonemized_text, gender] + raw_audio_data

# --- Build the Gradio Interface (No changes needed here) ---
with gr.Blocks(css="footer {display: none !important}") as demo:
    gr.Markdown("# Vibravox Viewer")
    loaded_dataset_state = gr.State(None)
    with gr.Row():
        subset_dropdown = gr.Dropdown(SUBSETS, value="speech_clean", label="Select Subset")
        split_dropdown = gr.Dropdown(SPLITS, value="train", label="Select Split")
    error_box = gr.Textbox(visible=False, interactive=False, container=False)
    with gr.Row():
        sentence_output = gr.Textbox(label="Raw Text", interactive=False)
        phonemized_output = gr.Textbox(label="Phonemized Text", interactive=False)
        gender_output = gr.Textbox(label="Gender", interactive=False)
    slider = gr.Slider(label="Select Data Row")
    with gr.Row():
        audio1 = gr.Audio(label="Headset Microphone")
        audio2 = gr.Audio(label="Laryngophone (Throat Mic)")
        audio3 = gr.Audio(label="Soft In-Ear Microphone")
    with gr.Row():
        audio4 = gr.Audio(label="Rigid In-Ear Microphone")
        audio5 = gr.Audio(label="Forehead Accelerometer")
        audio6 = gr.Audio(label="Temple Vibration Pickup")

    all_outputs = [loaded_dataset_state, slider, sentence_output, phonemized_output, gender_output, audio1, audio2, audio3, audio4, audio5, audio6, error_box]
    data_outputs = [sentence_output, phonemized_output, gender_output, audio1, audio2, audio3, audio4, audio5, audio6]
    
    demo.load(fn=load_and_update_all, inputs=[subset_dropdown, split_dropdown], outputs=all_outputs)
    subset_dropdown.change(fn=load_and_update_all, inputs=[subset_dropdown, split_dropdown], outputs=all_outputs)
    split_dropdown.change(fn=load_and_update_all, inputs=[subset_dropdown, split_dropdown], outputs=all_outputs)
    slider.change(fn=get_audio_row, inputs=[loaded_dataset_state, slider], outputs=data_outputs)

demo.launch()