zach commited on
Commit
1a6c67a
·
1 Parent(s): e91a94a

Simplify integration logic in app.py

Browse files
Files changed (3) hide show
  1. src/app.py +15 -50
  2. src/integrations/hume_api.py +1 -2
  3. src/utils.py +8 -19
src/app.py CHANGED
@@ -30,9 +30,9 @@ from src.integrations import (
30
  text_to_speech_with_hume,
31
  )
32
  from src.utils import (
33
- choose_providers,
34
  create_shuffled_tts_options,
35
  determine_selected_option,
 
36
  submit_voting_results,
37
  validate_character_description_length,
38
  validate_text_length,
@@ -124,68 +124,31 @@ class App:
124
  logger.warning(f"Validation error: {ve}")
125
  raise gr.Error(str(ve))
126
 
127
- # Select 2 TTS providers based on whether the text has been modified.
128
  text_modified = text != generated_text_state
129
- provider_a, provider_b = choose_providers(text_modified)
 
130
 
131
  try:
132
- start_time = time.time()
133
  logger.info(f"Starting speech synthesis with providers: {provider_a} and {provider_b}")
 
134
 
135
- if provider_b == constants.HUME_AI:
136
- # If generating 2 Hume outputs, do so in a single API call to reduce overhead
137
- logger.info("Using single Hume API call for both audio outputs")
138
- num_generations = 2
139
- result = await text_to_speech_with_hume(character_description, text, num_generations, self.config)
140
 
141
- # Enforce that 4 values are returned.
142
- if not (isinstance(result, tuple) and len(result) == 4):
143
- raise ValueError("Expected 4 values from Hume TTS call when generating 2 outputs")
144
 
145
- generation_id_a, audio_a, generation_id_b, audio_b = result
146
- logger.info(f"Completed dual Hume synthesis in {time.time() - start_time:.2f} seconds")
147
- else:
148
- # Process API calls sequentially to avoid resource contention
149
- logger.info(f"Sequential processing: First generating audio with {provider_a}")
150
 
151
- # Generate a single Hume output
152
- num_generations = 1
153
- result_a = await text_to_speech_with_hume(character_description, text, num_generations, self.config)
154
-
155
- if not isinstance(result_a, tuple) or len(result_a) != 2:
156
- raise ValueError("Expected 2 values from Hume TTS call when generating 1 output")
157
-
158
- generation_id_a, audio_a = result_a[0], result_a[1]
159
- logger.info(f"First audio generated in {time.time() - start_time:.2f} seconds")
160
-
161
- # Generate a second TTS output from the second provider
162
- logger.info(f"Now generating audio with {provider_b}")
163
- second_start = time.time()
164
-
165
- match provider_b:
166
- case constants.ELEVENLABS:
167
- result_b = await text_to_speech_with_elevenlabs(character_description, text, self.config)
168
- case _:
169
- # Additional TTS Providers can be added here.
170
- raise ValueError(f"Unsupported provider: {provider_b}")
171
-
172
- generation_id_b, audio_b = result_b[0], result_b[1]
173
-
174
- logger.info(f"Second audio generated in {time.time() - second_start:.2f} seconds")
175
- logger.info(f"Total synthesis time: {time.time() - start_time:.2f} seconds")
176
-
177
-
178
- # Shuffle options so that placement of options in the UI will always be random.
179
  option_a = Option(provider=provider_a, audio=audio_a, generation_id=generation_id_a)
180
  option_b = Option(provider=provider_b, audio=audio_b, generation_id=generation_id_b)
181
  options_map: OptionMap = create_shuffled_tts_options(option_a, option_b)
182
 
183
- option_a_audio = options_map["option_a"]["audio_file_path"]
184
- option_b_audio = options_map["option_b"]["audio_file_path"]
185
-
186
  return (
187
- gr.update(value=option_a_audio, visible=True, autoplay=True),
188
- gr.update(value=option_b_audio, visible=True),
189
  options_map,
190
  text_modified,
191
  text,
@@ -194,9 +157,11 @@ class App:
194
  except ElevenLabsError as ee:
195
  logger.error(f"ElevenLabsError while synthesizing speech from text: {ee!s}")
196
  raise gr.Error(f'There was an issue communicating with the Elevenlabs API: "{ee.message}"')
 
197
  except HumeError as he:
198
  logger.error(f"HumeError while synthesizing speech from text: {he!s}")
199
  raise gr.Error(f'There was an issue communicating with the Hume API: "{he.message}"')
 
200
  except Exception as e:
201
  logger.error(f"Unexpected error during TTS generation: {e}")
202
  raise gr.Error("An unexpected error occurred. Please try again later.")
 
30
  text_to_speech_with_hume,
31
  )
32
  from src.utils import (
 
33
  create_shuffled_tts_options,
34
  determine_selected_option,
35
+ get_random_provider,
36
  submit_voting_results,
37
  validate_character_description_length,
38
  validate_text_length,
 
124
  logger.warning(f"Validation error: {ve}")
125
  raise gr.Error(str(ve))
126
 
 
127
  text_modified = text != generated_text_state
128
+ provider_a = constants.HUME_AI # always compare with Hume
129
+ provider_b = get_random_provider(text_modified)
130
 
131
  try:
 
132
  logger.info(f"Starting speech synthesis with providers: {provider_a} and {provider_b}")
133
+ generation_id_a, audio_a = await text_to_speech_with_hume(character_description, text, self.config)
134
 
135
+ tts_provider_funcs = {
136
+ constants.HUME_AI: text_to_speech_with_hume,
137
+ constants.ELEVENLABS: text_to_speech_with_elevenlabs,
138
+ }
 
139
 
140
+ if provider_b not in tts_provider_funcs:
141
+ raise ValueError(f"Unsupported provider: {provider_b}")
 
142
 
143
+ generation_id_b, audio_b = await tts_provider_funcs[provider_b](character_description, text, self.config)
 
 
 
 
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  option_a = Option(provider=provider_a, audio=audio_a, generation_id=generation_id_a)
146
  option_b = Option(provider=provider_b, audio=audio_b, generation_id=generation_id_b)
147
  options_map: OptionMap = create_shuffled_tts_options(option_a, option_b)
148
 
 
 
 
149
  return (
150
+ gr.update(value=options_map["option_a"]["audio_file_path"], visible=True, autoplay=True),
151
+ gr.update(value=options_map["option_b"]["audio_file_path"], visible=True),
152
  options_map,
153
  text_modified,
154
  text,
 
157
  except ElevenLabsError as ee:
158
  logger.error(f"ElevenLabsError while synthesizing speech from text: {ee!s}")
159
  raise gr.Error(f'There was an issue communicating with the Elevenlabs API: "{ee.message}"')
160
+
161
  except HumeError as he:
162
  logger.error(f"HumeError while synthesizing speech from text: {he!s}")
163
  raise gr.Error(f'There was an issue communicating with the Hume API: "{he.message}"')
164
+
165
  except Exception as e:
166
  logger.error(f"Unexpected error during TTS generation: {e}")
167
  raise gr.Error("An unexpected error occurred. Please try again later.")
src/integrations/hume_api.py CHANGED
@@ -20,8 +20,7 @@ from typing import Tuple, Union
20
  # Third-Party Library Imports
21
  from hume import AsyncHumeClient
22
  from hume.core.api_error import ApiError
23
- from hume.tts import PostedUtterance
24
- from hume.tts.types import Format, FormatMp3, ReturnTts
25
  from tenacity import after_log, before_log, retry, retry_if_exception, stop_after_attempt, wait_exponential
26
 
27
  # Local Application Imports
 
20
  # Third-Party Library Imports
21
  from hume import AsyncHumeClient
22
  from hume.core.api_error import ApiError
23
+ from hume.tts.types import Format, FormatMp3, PostedUtterance, ReturnTts
 
24
  from tenacity import after_log, before_log, retry, retry_if_exception, stop_after_attempt, wait_exponential
25
 
26
  # Local Application Imports
src/utils.py CHANGED
@@ -203,33 +203,22 @@ def save_base64_audio_to_file(base64_audio: str, filename: str, config: Config)
203
  return str(relative_path)
204
 
205
 
206
- def choose_providers(text_modified: bool) -> Tuple[TTSProviderName, TTSProviderName]:
207
  """
208
- Select two TTS providers based on whether the text has been modified.
209
-
210
- The first provider is always set to "Hume AI". For the second provider:
211
- - If the text has been modified or no character description is provided, it will be "Hume AI"
212
- - Otherwise, it will be "Hume AI" 30% of the time and "ElevenLabs" 70% of the time
213
 
214
  Args:
215
  text_modified (bool): A flag indicating whether the text has been modified.
216
 
217
  Returns:
218
- Tuple[TTSProviderName, TTSProviderName]: A tuple containing two TTS provider names,
219
- where the first is always "Hume AI" and the second is determined by the conditions
220
- and probability distribution described above.
221
  """
 
 
222
 
223
- hume_comparison_only = text_modified
224
-
225
- provider_a = constants.HUME_AI
226
-
227
- if hume_comparison_only:
228
- provider_b = constants.HUME_AI
229
- else:
230
- provider_b = constants.HUME_AI if random.random() < 0.3 else constants.ELEVENLABS
231
-
232
- return provider_a, provider_b
233
 
234
 
235
  def create_shuffled_tts_options(option_a: Option, option_b: Option) -> OptionMap:
 
203
  return str(relative_path)
204
 
205
 
206
+ def get_random_provider(text_modified: bool) -> TTSProviderName:
207
  """
208
+ Select a TTS provider based on whether the text has been modified.
 
 
 
 
209
 
210
  Args:
211
  text_modified (bool): A flag indicating whether the text has been modified.
212
 
213
  Returns:
214
+ provider: A TTS provider selected based on the following criteria:
215
+ - If the text has been modified, it will be "Hume AI"
216
+ - Otherwise, it will be "Hume AI" 30% of the time and "ElevenLabs" 70% of the time
217
  """
218
+ if text_modified:
219
+ return constants.HUME_AI
220
 
221
+ return constants.HUME_AI if random.random() < 0.3 else constants.ELEVENLABS
 
 
 
 
 
 
 
 
 
222
 
223
 
224
  def create_shuffled_tts_options(option_a: Option, option_b: Option) -> OptionMap: