ylankgz commited on
Commit
0488cfb
·
1 Parent(s): a30344c

Add examples table

Browse files
Files changed (3) hide show
  1. app.py +28 -92
  2. requirements.txt +1 -1
  3. util.py +3 -3
app.py CHANGED
@@ -39,14 +39,12 @@ token_ = os.getenv('HF_TOKEN')
39
 
40
  # Model configurations
41
  models_configs = {
42
- 'Base_pretrained_model': Config(),
43
- 'Female_voice': Config(
44
  model_name='nineninesix/lfm-nano-codec-expresso-ex02-v.0.2',
45
- temperature=0.2
46
  ),
47
- 'Male_voice': Config(
48
  model_name='nineninesix/lfm-nano-codec-expresso-ex01-v.0.1',
49
- temperature=0.2
50
  )
51
  }
52
 
@@ -61,31 +59,11 @@ for model_name, config in models_configs.items():
61
  print("All models loaded!")
62
 
63
 
64
-
65
- # def initialize_models():
66
- # """Initialize models globally to avoid reloading"""
67
- # global models
68
-
69
- # # if player is None:
70
- # # print("Initializing NeMo Audio Player...")
71
- # # player = NemoAudioPlayer(Config())
72
- # # print("NeMo Audio Player initialized!")
73
-
74
- # if not models:
75
- # print("Loading TTS models...")
76
- # for model_name, config in models_configs.items():
77
- # print(f"Loading {model_name}...")
78
- # models[model_name] = KaniModel(config, player, token_)
79
- # print(f"{model_name} loaded!")
80
- # print("All models loaded!")
81
-
82
  @spaces.GPU
83
  def generate_speech_gpu(text, model_choice):
84
  """
85
  Generate speech from text using the selected model on GPU
86
  """
87
- # Initialize models if not already done
88
- # initialize_models()
89
 
90
  if not text.strip():
91
  return None, "Please enter text for speech generation."
@@ -114,16 +92,8 @@ def generate_speech_gpu(text, model_choice):
114
  print(f"Error during generation: {str(e)}")
115
  return None, f"❌ Error during generation: {str(e)}"
116
 
117
- # def validate_input(text, model_choice):
118
- # """Quick validation without GPU"""
119
- # if not text.strip():
120
- # return "⚠️ Please enter text for speech generation."
121
- # if not model_choice:
122
- # return "⚠️ Please select a model."
123
- # return f"✅ Ready to generate with {model_choice}"
124
-
125
  # Create Gradio interface
126
- with gr.Blocks(title="KaniTTS - Text to Speech", theme=gr.themes.Default()) as demo:
127
  gr.Markdown("# KaniTTS: Fast and Expressive Speech Generation Model")
128
  gr.Markdown("Select a model and enter text to generate high-quality speech")
129
 
@@ -137,20 +107,18 @@ with gr.Blocks(title="KaniTTS - Text to Speech", theme=gr.themes.Default()) as d
137
  )
138
 
139
  text_input = gr.Textbox(
140
- label="Enter Text",
141
- placeholder="Enter text for speech generation...",
142
  lines=3,
143
  max_lines=10
144
  )
145
 
146
  generate_btn = gr.Button("🎵 Generate Speech", variant="primary", size="lg")
147
-
148
- # Quick validation button (CPU only)
149
- # validate_btn = gr.Button("🔍 Validate Input", variant="secondary")
150
 
151
  with gr.Column(scale=1):
152
  audio_output = gr.Audio(
153
- label="Generated Speech",
154
  type="numpy"
155
  )
156
 
@@ -168,64 +136,32 @@ with gr.Blocks(title="KaniTTS - Text to Speech", theme=gr.themes.Default()) as d
168
  outputs=[audio_output, time_report_output]
169
  )
170
 
171
- # Demo Examples
172
- gr.Markdown("## 🎯 Demo Examples")
173
 
174
  def play_demo(text):
175
  return (22050, demo_examples[text]), 'DEMO'
176
 
177
  with gr.Row():
178
- for text in list(demo_examples.keys())[:4]:
179
- gr.Button(text).click(lambda t=text: play_demo(t), outputs=[audio_output, time_report_output])
180
-
181
- with gr.Row():
182
- for text in list(demo_examples.keys())[4:8]:
183
- gr.Button(text).click(lambda t=text: play_demo(t), outputs=[audio_output, time_report_output])
184
 
185
-
186
- # # CPU validation event
187
- # validate_btn.click(
188
- # fn=validate_input,
189
- # inputs=[text_input, model_dropdown],
190
- # outputs=status_text
191
- # )
192
-
193
- # # Update status on input change
194
- # text_input.change(
195
- # fn=validate_input,
196
- # inputs=[text_input, model_dropdown],
197
- # outputs=status_text
198
- # )
199
-
200
- # Text examples
201
- # gr.Markdown("### 📝 Text Examples:")
202
- # examples = [
203
- # "Hello! How are you today?",
204
- # "Welcome to the world of artificial intelligence.",
205
- # "This is a demonstration of neural text-to-speech synthesis.",
206
- # "Zero GPU makes high-quality speech generation accessible to everyone!"
207
- # ]
208
-
209
- # gr.Examples(
210
- # examples=examples,
211
- # inputs=text_input,
212
- # label="Click on an example to use it"
213
- # )
214
-
215
- # # Information section
216
- # with gr.Accordion("ℹ️ Model Information", open=False):
217
- # gr.Markdown("""
218
- # **Available Models:**
219
- # - **Base Model**: Default pre-trained model for general use
220
- # - **Female Voice**: Optimized for female voice characteristics
221
- # - **Male Voice**: Optimized for male voice characteristics
222
-
223
- # **Features:**
224
- # - Powered by NVIDIA NeMo Toolkit
225
- # - High-quality 22kHz audio output
226
- # - Zero GPU acceleration for fast inference
227
- # - Support for long text sequences
228
- # """)
229
 
230
  if __name__ == "__main__":
231
  demo.launch(
 
39
 
40
  # Model configurations
41
  models_configs = {
42
+ 'base': Config(),
43
+ 'female': Config(
44
  model_name='nineninesix/lfm-nano-codec-expresso-ex02-v.0.2',
 
45
  ),
46
+ 'male': Config(
47
  model_name='nineninesix/lfm-nano-codec-expresso-ex01-v.0.1',
 
48
  )
49
  }
50
 
 
59
  print("All models loaded!")
60
 
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  @spaces.GPU
63
  def generate_speech_gpu(text, model_choice):
64
  """
65
  Generate speech from text using the selected model on GPU
66
  """
 
 
67
 
68
  if not text.strip():
69
  return None, "Please enter text for speech generation."
 
92
  print(f"Error during generation: {str(e)}")
93
  return None, f"❌ Error during generation: {str(e)}"
94
 
 
 
 
 
 
 
 
 
95
  # Create Gradio interface
96
+ with gr.Blocks(title="😻 KaniTTS - Text to Speech", theme=gr.themes.Default()) as demo:
97
  gr.Markdown("# KaniTTS: Fast and Expressive Speech Generation Model")
98
  gr.Markdown("Select a model and enter text to generate high-quality speech")
99
 
 
107
  )
108
 
109
  text_input = gr.Textbox(
110
+ label="Text",
111
+ placeholder="Enter your text ...",
112
  lines=3,
113
  max_lines=10
114
  )
115
 
116
  generate_btn = gr.Button("🎵 Generate Speech", variant="primary", size="lg")
117
+
 
 
118
 
119
  with gr.Column(scale=1):
120
  audio_output = gr.Audio(
121
+ label="Generated Audio",
122
  type="numpy"
123
  )
124
 
 
136
  outputs=[audio_output, time_report_output]
137
  )
138
 
139
+ gr.Markdown("## Examples")
 
140
 
141
  def play_demo(text):
142
  return (22050, demo_examples[text]), 'DEMO'
143
 
144
  with gr.Row():
 
 
 
 
 
 
145
 
146
+ examples = [
147
+ ["Anyway, um, so, um, tell me, tell me all about her. I mean, what's she like? Is she really, you know, pretty?", "male"],
148
+ ["No, that does not make you a failure. No, sweetie, no. It just, uh, it just means that you're having a tough time...", "male"],
149
+ ["I-- Oh, I am such an idiot sometimes. I'm so sorry. Um, I-I don't know where my head's at.", "male"],
150
+ ["Got it. $300,000. I can definitely help you get a very good price for your property by selecting a realtor.", "female"],
151
+ ["Holy fu- Oh my God! Don't you understand how dangerous it is, huh?", "male"],
152
+ ["You make my days brighter, and my wildest dreams feel like reality. How do you do that?", "female"],
153
+ ["Great, and just a couple quick questions so we can match you with the right buyer. Is your home address still 330 East Charleston Road?", "female"],
154
+ ["Oh, yeah. I mean did you want to get a quick snack together or maybe something before you go?", "female"],
155
+ ]
156
+
157
+
158
+ gr.Examples(
159
+ examples=examples,
160
+ inputs=[text_input, model_dropdown],
161
+ outputs=audio_output,
162
+ fn=lambda t=text_input: play_demo(t), outputs=[audio_output, time_report_output],
163
+ cache_examples=True,
164
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
  if __name__ == "__main__":
167
  demo.launch(
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  torch==2.8.0
2
  librosa==0.11.0
3
- nemo_toolkit[all]==2.4.0
4
  numpy==1.26.4
5
  gradio>=4.0.0
 
1
  torch==2.8.0
2
  librosa==0.11.0
3
+ nemo_toolkit[tts]==2.4.0
4
  numpy==1.26.4
5
  gradio>=4.0.0
util.py CHANGED
@@ -197,7 +197,7 @@ class KaniModel:
197
  model_request = point_2 - point_1
198
  player_time = point_3 - point_2
199
  total_time = point_3 - point_1
200
- report = f"MODEL GENERATION: {model_request:.2f}\nNANO CODEC: {player_time:.2f}\nTOTAL: {total_time:.2f}"
201
  return report
202
 
203
  def run_model(self, text: str):
@@ -256,9 +256,9 @@ class Demo:
256
  return arr
257
 
258
  def __call__(self):
259
- examples = {}
260
  for idx, (sentence, url) in enumerate(zip(self.sentences, self.urls), start=1):
261
  filename = f"{idx}.wav"
262
  filepath = self.download_audio(url, filename)
263
- examples[sentence] = self.get_audio(filepath)
264
  return examples
 
197
  model_request = point_2 - point_1
198
  player_time = point_3 - point_2
199
  total_time = point_3 - point_1
200
+ report = f"SPEECH TOKENS: {model_request:.2f}\n CODEC: {player_time:.2f}\nTOTAL: {total_time:.2f}"
201
  return report
202
 
203
  def run_model(self, text: str):
 
256
  return arr
257
 
258
  def __call__(self):
259
+ examples = []
260
  for idx, (sentence, url) in enumerate(zip(self.sentences, self.urls), start=1):
261
  filename = f"{idx}.wav"
262
  filepath = self.download_audio(url, filename)
263
+ examples.append([sentence, self.get_audio(filepath)])
264
  return examples