Spaces:
Running
Running
zach
commited on
Commit
·
1a6c67a
1
Parent(s):
e91a94a
Simplify integration logic in app.py
Browse files- src/app.py +15 -50
- src/integrations/hume_api.py +1 -2
- src/utils.py +8 -19
src/app.py
CHANGED
@@ -30,9 +30,9 @@ from src.integrations import (
|
|
30 |
text_to_speech_with_hume,
|
31 |
)
|
32 |
from src.utils import (
|
33 |
-
choose_providers,
|
34 |
create_shuffled_tts_options,
|
35 |
determine_selected_option,
|
|
|
36 |
submit_voting_results,
|
37 |
validate_character_description_length,
|
38 |
validate_text_length,
|
@@ -124,68 +124,31 @@ class App:
|
|
124 |
logger.warning(f"Validation error: {ve}")
|
125 |
raise gr.Error(str(ve))
|
126 |
|
127 |
-
# Select 2 TTS providers based on whether the text has been modified.
|
128 |
text_modified = text != generated_text_state
|
129 |
-
provider_a
|
|
|
130 |
|
131 |
try:
|
132 |
-
start_time = time.time()
|
133 |
logger.info(f"Starting speech synthesis with providers: {provider_a} and {provider_b}")
|
|
|
134 |
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
result = await text_to_speech_with_hume(character_description, text, num_generations, self.config)
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
raise ValueError("Expected 4 values from Hume TTS call when generating 2 outputs")
|
144 |
|
145 |
-
|
146 |
-
logger.info(f"Completed dual Hume synthesis in {time.time() - start_time:.2f} seconds")
|
147 |
-
else:
|
148 |
-
# Process API calls sequentially to avoid resource contention
|
149 |
-
logger.info(f"Sequential processing: First generating audio with {provider_a}")
|
150 |
|
151 |
-
# Generate a single Hume output
|
152 |
-
num_generations = 1
|
153 |
-
result_a = await text_to_speech_with_hume(character_description, text, num_generations, self.config)
|
154 |
-
|
155 |
-
if not isinstance(result_a, tuple) or len(result_a) != 2:
|
156 |
-
raise ValueError("Expected 2 values from Hume TTS call when generating 1 output")
|
157 |
-
|
158 |
-
generation_id_a, audio_a = result_a[0], result_a[1]
|
159 |
-
logger.info(f"First audio generated in {time.time() - start_time:.2f} seconds")
|
160 |
-
|
161 |
-
# Generate a second TTS output from the second provider
|
162 |
-
logger.info(f"Now generating audio with {provider_b}")
|
163 |
-
second_start = time.time()
|
164 |
-
|
165 |
-
match provider_b:
|
166 |
-
case constants.ELEVENLABS:
|
167 |
-
result_b = await text_to_speech_with_elevenlabs(character_description, text, self.config)
|
168 |
-
case _:
|
169 |
-
# Additional TTS Providers can be added here.
|
170 |
-
raise ValueError(f"Unsupported provider: {provider_b}")
|
171 |
-
|
172 |
-
generation_id_b, audio_b = result_b[0], result_b[1]
|
173 |
-
|
174 |
-
logger.info(f"Second audio generated in {time.time() - second_start:.2f} seconds")
|
175 |
-
logger.info(f"Total synthesis time: {time.time() - start_time:.2f} seconds")
|
176 |
-
|
177 |
-
|
178 |
-
# Shuffle options so that placement of options in the UI will always be random.
|
179 |
option_a = Option(provider=provider_a, audio=audio_a, generation_id=generation_id_a)
|
180 |
option_b = Option(provider=provider_b, audio=audio_b, generation_id=generation_id_b)
|
181 |
options_map: OptionMap = create_shuffled_tts_options(option_a, option_b)
|
182 |
|
183 |
-
option_a_audio = options_map["option_a"]["audio_file_path"]
|
184 |
-
option_b_audio = options_map["option_b"]["audio_file_path"]
|
185 |
-
|
186 |
return (
|
187 |
-
gr.update(value=
|
188 |
-
gr.update(value=
|
189 |
options_map,
|
190 |
text_modified,
|
191 |
text,
|
@@ -194,9 +157,11 @@ class App:
|
|
194 |
except ElevenLabsError as ee:
|
195 |
logger.error(f"ElevenLabsError while synthesizing speech from text: {ee!s}")
|
196 |
raise gr.Error(f'There was an issue communicating with the Elevenlabs API: "{ee.message}"')
|
|
|
197 |
except HumeError as he:
|
198 |
logger.error(f"HumeError while synthesizing speech from text: {he!s}")
|
199 |
raise gr.Error(f'There was an issue communicating with the Hume API: "{he.message}"')
|
|
|
200 |
except Exception as e:
|
201 |
logger.error(f"Unexpected error during TTS generation: {e}")
|
202 |
raise gr.Error("An unexpected error occurred. Please try again later.")
|
|
|
30 |
text_to_speech_with_hume,
|
31 |
)
|
32 |
from src.utils import (
|
|
|
33 |
create_shuffled_tts_options,
|
34 |
determine_selected_option,
|
35 |
+
get_random_provider,
|
36 |
submit_voting_results,
|
37 |
validate_character_description_length,
|
38 |
validate_text_length,
|
|
|
124 |
logger.warning(f"Validation error: {ve}")
|
125 |
raise gr.Error(str(ve))
|
126 |
|
|
|
127 |
text_modified = text != generated_text_state
|
128 |
+
provider_a = constants.HUME_AI # always compare with Hume
|
129 |
+
provider_b = get_random_provider(text_modified)
|
130 |
|
131 |
try:
|
|
|
132 |
logger.info(f"Starting speech synthesis with providers: {provider_a} and {provider_b}")
|
133 |
+
generation_id_a, audio_a = await text_to_speech_with_hume(character_description, text, self.config)
|
134 |
|
135 |
+
tts_provider_funcs = {
|
136 |
+
constants.HUME_AI: text_to_speech_with_hume,
|
137 |
+
constants.ELEVENLABS: text_to_speech_with_elevenlabs,
|
138 |
+
}
|
|
|
139 |
|
140 |
+
if provider_b not in tts_provider_funcs:
|
141 |
+
raise ValueError(f"Unsupported provider: {provider_b}")
|
|
|
142 |
|
143 |
+
generation_id_b, audio_b = await tts_provider_funcs[provider_b](character_description, text, self.config)
|
|
|
|
|
|
|
|
|
144 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
option_a = Option(provider=provider_a, audio=audio_a, generation_id=generation_id_a)
|
146 |
option_b = Option(provider=provider_b, audio=audio_b, generation_id=generation_id_b)
|
147 |
options_map: OptionMap = create_shuffled_tts_options(option_a, option_b)
|
148 |
|
|
|
|
|
|
|
149 |
return (
|
150 |
+
gr.update(value=options_map["option_a"]["audio_file_path"], visible=True, autoplay=True),
|
151 |
+
gr.update(value=options_map["option_b"]["audio_file_path"], visible=True),
|
152 |
options_map,
|
153 |
text_modified,
|
154 |
text,
|
|
|
157 |
except ElevenLabsError as ee:
|
158 |
logger.error(f"ElevenLabsError while synthesizing speech from text: {ee!s}")
|
159 |
raise gr.Error(f'There was an issue communicating with the Elevenlabs API: "{ee.message}"')
|
160 |
+
|
161 |
except HumeError as he:
|
162 |
logger.error(f"HumeError while synthesizing speech from text: {he!s}")
|
163 |
raise gr.Error(f'There was an issue communicating with the Hume API: "{he.message}"')
|
164 |
+
|
165 |
except Exception as e:
|
166 |
logger.error(f"Unexpected error during TTS generation: {e}")
|
167 |
raise gr.Error("An unexpected error occurred. Please try again later.")
|
src/integrations/hume_api.py
CHANGED
@@ -20,8 +20,7 @@ from typing import Tuple, Union
|
|
20 |
# Third-Party Library Imports
|
21 |
from hume import AsyncHumeClient
|
22 |
from hume.core.api_error import ApiError
|
23 |
-
from hume.tts import PostedUtterance
|
24 |
-
from hume.tts.types import Format, FormatMp3, ReturnTts
|
25 |
from tenacity import after_log, before_log, retry, retry_if_exception, stop_after_attempt, wait_exponential
|
26 |
|
27 |
# Local Application Imports
|
|
|
20 |
# Third-Party Library Imports
|
21 |
from hume import AsyncHumeClient
|
22 |
from hume.core.api_error import ApiError
|
23 |
+
from hume.tts.types import Format, FormatMp3, PostedUtterance, ReturnTts
|
|
|
24 |
from tenacity import after_log, before_log, retry, retry_if_exception, stop_after_attempt, wait_exponential
|
25 |
|
26 |
# Local Application Imports
|
src/utils.py
CHANGED
@@ -203,33 +203,22 @@ def save_base64_audio_to_file(base64_audio: str, filename: str, config: Config)
|
|
203 |
return str(relative_path)
|
204 |
|
205 |
|
206 |
-
def
|
207 |
"""
|
208 |
-
Select
|
209 |
-
|
210 |
-
The first provider is always set to "Hume AI". For the second provider:
|
211 |
-
- If the text has been modified or no character description is provided, it will be "Hume AI"
|
212 |
-
- Otherwise, it will be "Hume AI" 30% of the time and "ElevenLabs" 70% of the time
|
213 |
|
214 |
Args:
|
215 |
text_modified (bool): A flag indicating whether the text has been modified.
|
216 |
|
217 |
Returns:
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
"""
|
|
|
|
|
222 |
|
223 |
-
|
224 |
-
|
225 |
-
provider_a = constants.HUME_AI
|
226 |
-
|
227 |
-
if hume_comparison_only:
|
228 |
-
provider_b = constants.HUME_AI
|
229 |
-
else:
|
230 |
-
provider_b = constants.HUME_AI if random.random() < 0.3 else constants.ELEVENLABS
|
231 |
-
|
232 |
-
return provider_a, provider_b
|
233 |
|
234 |
|
235 |
def create_shuffled_tts_options(option_a: Option, option_b: Option) -> OptionMap:
|
|
|
203 |
return str(relative_path)
|
204 |
|
205 |
|
206 |
+
def get_random_provider(text_modified: bool) -> TTSProviderName:
|
207 |
"""
|
208 |
+
Select a TTS provider based on whether the text has been modified.
|
|
|
|
|
|
|
|
|
209 |
|
210 |
Args:
|
211 |
text_modified (bool): A flag indicating whether the text has been modified.
|
212 |
|
213 |
Returns:
|
214 |
+
provider: A TTS provider selected based on the following criteria:
|
215 |
+
- If the text has been modified, it will be "Hume AI"
|
216 |
+
- Otherwise, it will be "Hume AI" 30% of the time and "ElevenLabs" 70% of the time
|
217 |
"""
|
218 |
+
if text_modified:
|
219 |
+
return constants.HUME_AI
|
220 |
|
221 |
+
return constants.HUME_AI if random.random() < 0.3 else constants.ELEVENLABS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
|
223 |
|
224 |
def create_shuffled_tts_options(option_a: Option, option_b: Option) -> OptionMap:
|