Spaces:
Running
Running
Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from datasets import load_dataset, Dataset, Audio, concatenate_datasets
|
3 |
+
import json
|
4 |
+
import os
|
5 |
+
from datetime import datetime
|
6 |
+
import shutil
|
7 |
+
|
8 |
+
# Directory to save recordings
|
9 |
+
AUDIO_DIR = "data/audios"
|
10 |
+
SAMPLING_RATE = 16000
|
11 |
+
os.makedirs(AUDIO_DIR, exist_ok=True)
|
12 |
+
|
13 |
+
# State variables
|
14 |
+
state = {
|
15 |
+
"sentences": [],
|
16 |
+
"recordings": {}, # Dictionary to store recordings by ID
|
17 |
+
"index": 0, # Index for navigating through sentences
|
18 |
+
"idx": 0, # Index for sentences (IDs)
|
19 |
+
"json_loaded": False
|
20 |
+
|
21 |
+
}
|
22 |
+
|
23 |
+
def load_json(file):
|
24 |
+
with open(file.name, "r", encoding="utf-8") as f:
|
25 |
+
content = json.load(f)
|
26 |
+
state["sentences"].extend(content)
|
27 |
+
state["recordings"].update({k["id"]:[] for k in content})
|
28 |
+
state["json_loaded"] = True
|
29 |
+
return update_display()
|
30 |
+
|
31 |
+
def update_display():
|
32 |
+
if not state["sentences"]:
|
33 |
+
return "No data loaded.", None, "", "", gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
|
34 |
+
|
35 |
+
idx = state["index"]
|
36 |
+
progress = ""
|
37 |
+
if state["json_loaded"]:
|
38 |
+
if idx >= len(state["sentences"]):
|
39 |
+
export_json()
|
40 |
+
return "✅ All sentences recorded!\n💾 Data Exported to Json", None, "", "", gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
|
41 |
+
|
42 |
+
progress = 0
|
43 |
+
for recordings in state["recordings"].values():
|
44 |
+
if len(recordings) > 0:
|
45 |
+
progress += 1
|
46 |
+
progress = f"{progress} / {len(state['sentences'])} recorded"
|
47 |
+
|
48 |
+
|
49 |
+
# Enable/Disable buttons based on the current index
|
50 |
+
next_btn_enabled = gr.update(visible= not (state["index"] == len(state["sentences"]) - 1))
|
51 |
+
prev_btn_enabled = gr.update(visible= not (state["index"] == 0))
|
52 |
+
|
53 |
+
recordings = []
|
54 |
+
text = ""
|
55 |
+
current_id = f"s_{state['idx']}"
|
56 |
+
if idx < len(state["sentences"]):
|
57 |
+
current = state["sentences"][idx]
|
58 |
+
current_id = current['id']
|
59 |
+
text = current["text"]
|
60 |
+
recordings = state["recordings"].get(current["id"], [])
|
61 |
+
|
62 |
+
if recordings:
|
63 |
+
# Get the most recent recording for that sentence ID
|
64 |
+
current_recording = recordings[-1]
|
65 |
+
current_audio = current_recording["audio"]
|
66 |
+
audio_visibility = gr.update(visible=True)
|
67 |
+
else:
|
68 |
+
current_audio = None
|
69 |
+
audio_visibility = gr.update(visible=False)
|
70 |
+
|
71 |
+
return text, None, f"ID: {current_id}", progress, gr.update(visible=True), prev_btn_enabled, next_btn_enabled, current_audio, audio_visibility
|
72 |
+
|
73 |
+
def record_audio(audio, text):
|
74 |
+
if state["sentences"] and state["index"] >= len(state["sentences"]):
|
75 |
+
return update_display()
|
76 |
+
|
77 |
+
if audio is None:
|
78 |
+
gr.Warning("The audio is empty, please provide a valid audio")
|
79 |
+
return update_display()
|
80 |
+
if state["json_loaded"]:
|
81 |
+
state["sentences"][state["index"]]["text"] = text # overwrite with current written value
|
82 |
+
else:
|
83 |
+
state["sentences"].append({"id": f"s_{state['idx']}", "text": text})
|
84 |
+
state["idx"] += 1
|
85 |
+
|
86 |
+
sentence = state["sentences"][state["index"]]
|
87 |
+
uid = sentence["id"]
|
88 |
+
|
89 |
+
filename = f"{uid}_{datetime.now().strftime('%Y%m%d%H%M%S')}.wav"
|
90 |
+
filepath = os.path.join(AUDIO_DIR, filename)
|
91 |
+
|
92 |
+
shutil.copy(audio, filepath)
|
93 |
+
|
94 |
+
# Add the new recording under the correct ID in the recordings dictionary
|
95 |
+
|
96 |
+
uid_versioning = uid
|
97 |
+
recordings = state["recordings"].get(uid, [])
|
98 |
+
if recordings:
|
99 |
+
uid_versioning = f"{uid}_v{len(recordings)}"
|
100 |
+
|
101 |
+
state["recordings"].setdefault(uid, []).append({
|
102 |
+
"id": uid_versioning,
|
103 |
+
"text": sentence["text"],
|
104 |
+
"audio": filepath
|
105 |
+
})
|
106 |
+
state["index"] += 1
|
107 |
+
return update_display()
|
108 |
+
|
109 |
+
def export_json():
|
110 |
+
output_path = "data/tts_dataset.json"
|
111 |
+
data = [record for records in state["recordings"].values() for record in records]
|
112 |
+
if data:
|
113 |
+
with open(output_path, "w") as f:
|
114 |
+
json.dump(data, f, indent=2)
|
115 |
+
else:
|
116 |
+
gr.Warning("There is no recorded data")
|
117 |
+
return output_path
|
118 |
+
|
119 |
+
def go_previous():
|
120 |
+
if state["index"] > 0:
|
121 |
+
state["index"] -= 1
|
122 |
+
return update_display()
|
123 |
+
|
124 |
+
def go_next():
|
125 |
+
if state["index"] < len(state["sentences"]) - 1:
|
126 |
+
state["index"] += 1
|
127 |
+
return update_display()
|
128 |
+
def push_to_hub(hub_id, is_new_dataset, sampling_rate):
|
129 |
+
if hub_id:
|
130 |
+
# flatten recordings
|
131 |
+
recordings = []
|
132 |
+
for element in state["recordings"].values():
|
133 |
+
for version in element:
|
134 |
+
recordings.append({"id": version["id"], "audio": version["audio"], "text": version["text"]})
|
135 |
+
|
136 |
+
dataset = Dataset.from_list(recordings)
|
137 |
+
dataset = dataset.cast_column("audio", Audio(sampling_rate=sampling_rate))
|
138 |
+
if not is_new_dataset:
|
139 |
+
previous_dataset = load_dataset(hub_id, split="train")
|
140 |
+
dataset = concatenate_datasets([previous_dataset, dataset])
|
141 |
+
dataset.push_to_hub(hub_id)
|
142 |
+
gr.Info("Succesfully synched with the hub")
|
143 |
+
else:
|
144 |
+
gr.Warning("The hub_id field is empty, please provide a relevant hub id.")
|
145 |
+
return update_display()
|
146 |
+
with gr.Blocks() as demo:
|
147 |
+
gr.Markdown("# 🗣️ TTS Dataset Recorder")
|
148 |
+
|
149 |
+
with gr.Row():
|
150 |
+
json_file = gr.File(label="Upload Sentences JSON", file_types=[".json"])
|
151 |
+
with gr.Column():
|
152 |
+
export_btn = gr.Button("💾 Export Metadata")
|
153 |
+
with gr.Row():
|
154 |
+
hub_id = gr.Textbox(label="Hub id", interactive=True)
|
155 |
+
with gr.Row():
|
156 |
+
is_new_dataset = gr.Checkbox(label="New dataset", interactive=True)
|
157 |
+
sampling_rate = gr.Number(label="Sampling rate", value=SAMPLING_RATE, precision=0)
|
158 |
+
push_to_hub_btn = gr.Button("🤗 Sync to HuggingFace")
|
159 |
+
|
160 |
+
id_display = gr.Textbox(label="ID", interactive=False)
|
161 |
+
progress_text = gr.Textbox(label="Progress", interactive=False)
|
162 |
+
sentence_text = gr.Textbox(label="Sentence", interactive=True)
|
163 |
+
audio_input = gr.Audio(type="filepath", label="Record your voice", interactive=True)
|
164 |
+
record_btn = gr.Button("✅ Submit Recording")
|
165 |
+
|
166 |
+
|
167 |
+
with gr.Row():
|
168 |
+
prev_btn = gr.Button("⬅️ Previous")
|
169 |
+
next_btn = gr.Button("➡️ Next")
|
170 |
+
|
171 |
+
# audio_player = gr.Audio(label="Play Recorded Audio", interactive=False)
|
172 |
+
audio_player = gr.Audio(label="Play Recorded Audio", type="filepath")
|
173 |
+
|
174 |
+
|
175 |
+
json_file.change(load_json, inputs=json_file, outputs=[sentence_text, audio_input, id_display, progress_text, record_btn, prev_btn, next_btn, audio_player, audio_player])
|
176 |
+
record_btn.click(record_audio, inputs=[audio_input, sentence_text], outputs=[sentence_text, audio_input, id_display, progress_text, record_btn, prev_btn, next_btn, audio_player, audio_player])
|
177 |
+
export_btn.click(export_json, outputs=gr.File())
|
178 |
+
|
179 |
+
prev_btn.click(go_previous, outputs=[sentence_text, audio_input, id_display, progress_text, record_btn, prev_btn, next_btn, audio_player, audio_player])
|
180 |
+
next_btn.click(go_next, outputs=[sentence_text, audio_input, id_display, progress_text, record_btn, prev_btn, next_btn, audio_player, audio_player])
|
181 |
+
|
182 |
+
|
183 |
+
push_to_hub_btn.click(push_to_hub, inputs=[hub_id, is_new_dataset], outputs=[sentence_text, audio_input, id_display, progress_text, record_btn, prev_btn, next_btn, audio_player, audio_player])
|
184 |
+
|
185 |
+
demo.launch()
|