twitchard commited on
Commit
1ed6720
·
unverified ·
1 Parent(s): cb57d96

more explicit control flow

Browse files
README.md CHANGED
@@ -76,12 +76,12 @@ Expressive TTS Arena/
76
 
77
  Standard
78
  ```sh
79
- uv run python -m src.app
80
  ```
81
 
82
  With hot-reloading
83
  ```sh
84
- uv run watchfiles "python -m src.app" src
85
  ```
86
 
87
  4. Test the application by navigating to the the localhost URL in your browser (e.g. `localhost:7860` or `http://127.0.0.1:7860`)
 
76
 
77
  Standard
78
  ```sh
79
+ uv run python -m src.main
80
  ```
81
 
82
  With hot-reloading
83
  ```sh
84
+ uv run watchfiles "python -m src.main" src
85
  ```
86
 
87
  4. Test the application by navigating to the the localhost URL in your browser (e.g. `localhost:7860` or `http://127.0.0.1:7860`)
pyproject.toml CHANGED
@@ -19,6 +19,7 @@ dependencies = [
19
  dev-dependencies = [
20
  "mypy>=1.15.0",
21
  "pre-commit>=4.1.0",
 
22
  "pytest>=8.3.4",
23
  "ruff>=0.9.5",
24
  "watchfiles>=1.0.4",
@@ -79,4 +80,4 @@ select = [
79
  max-line-length = 120
80
 
81
  [tool.ruff.lint.pydocstyle]
82
- convention = "google"
 
19
  dev-dependencies = [
20
  "mypy>=1.15.0",
21
  "pre-commit>=4.1.0",
22
+ "pyright>=1.1.394",
23
  "pytest>=8.3.4",
24
  "ruff>=0.9.5",
25
  "watchfiles>=1.0.4",
 
80
  max-line-length = 120
81
 
82
  [tool.ruff.lint.pydocstyle]
83
+ convention = "google"
src/app.py CHANGED
@@ -18,8 +18,9 @@ import gradio as gr
18
 
19
  # Local Application Imports
20
  from src import constants
21
- from src.config import AUDIO_DIR, logger
22
  from src.custom_types import ComparisonType, Option, OptionMap
 
23
  from src.integrations import (
24
  AnthropicError,
25
  ElevenLabsError,
@@ -38,510 +39,499 @@ from src.utils import (
38
  )
39
 
40
 
41
- def generate_text(
42
- character_description: str,
43
- ) -> Tuple[Union[str, gr.update], gr.update]:
44
- """
45
- Validates the character_description and generates text using Anthropic API.
46
-
47
- Args:
48
- character_description (str): The user-provided text for character description.
49
-
50
- Returns:
51
- Tuple containing:
52
- - The generated text (as a gr.update).
53
- - An update for the generated text state.
54
-
55
- Raises:
56
- gr.Error: On validation or API errors.
57
- """
58
- try:
59
- validate_character_description_length(character_description)
60
- except ValueError as ve:
61
- logger.warning(f"Validation error: {ve}")
62
- raise gr.Error(str(ve))
63
-
64
- try:
65
- generated_text = generate_text_with_claude(character_description)
66
- logger.info(f"Generated text ({len(generated_text)} characters).")
67
- return gr.update(value=generated_text), generated_text
68
- except AnthropicError as ae:
69
- logger.error(f"AnthropicError while generating text: {ae!s}")
70
- raise gr.Error(
71
- f'There was an issue communicating with the Anthropic API: "{ae.message}"'
72
- )
73
- except Exception as e:
74
- logger.error(f"Unexpected error while generating text: {e}")
75
- raise gr.Error("Failed to generate text. Please try again later.")
76
-
77
-
78
- def synthesize_speech(
79
- character_description: str,
80
- text: str,
81
- generated_text_state: str,
82
- ) -> Tuple[gr.update, gr.update, dict, str, ComparisonType, str, str, bool, str, str]:
83
- """
84
- Synthesizes two text-to-speech outputs, updates UI state components, and returns additional TTS metadata.
85
-
86
- This function generates TTS outputs using different providers based on the input text and its modification
87
- state. Depending on the selected providers, it may:
88
- - Synthesize one Hume and one ElevenLabs output (50% chance), or
89
- - Synthesize two Hume outputs (50% chance).
90
-
91
- The outputs are processed and shuffled, and the corresponding UI components for two audio players are updated.
92
- Additional metadata such as the generation IDs, comparison type, and state information are also returned.
93
-
94
- Args:
95
- character_description (str): The description of the character used for generating the voice.
96
- text (str): The text content to be synthesized into speech.
97
- generated_text_state (str): The previously generated text state, used to determine if the text has
98
- been modified.
99
-
100
- Returns:
101
- Tuple containing:
102
- - gr.update: Update for the first audio player (with autoplay enabled).
103
- - gr.update: Update for the second audio player.
104
- - dict: A mapping of option constants to their corresponding TTS providers.
105
- - str: The raw audio value (relative file path) for option B.
106
- - ComparisonType: The comparison type between the selected TTS providers.
107
- - str: Generation ID for option A.
108
- - str: Generation ID for option B.
109
- - bool: Flag indicating whether the text was modified.
110
- - str: The original text that was synthesized.
111
- - str: The original character description.
112
-
113
- Raises:
114
- gr.Error: If any API or unexpected errors occur during the TTS synthesis process.
115
- """
116
- if not text:
117
- logger.warning("Skipping text-to-speech due to empty text.")
118
- raise gr.Error("Please generate or enter text to synthesize.")
119
-
120
- # Select 2 TTS providers based on whether the text has been modified.
121
- text_modified = text != generated_text_state
122
- provider_a, provider_b = choose_providers(
123
- text_modified, character_description
124
- )
125
-
126
- try:
127
- if provider_b == constants.HUME_AI:
128
- # If generating 2 Hume outputs, do so in a single API call
129
- (
130
- generation_id_a,
131
- audio_a,
132
- generation_id_b,
133
- audio_b,
134
- ) = text_to_speech_with_hume(character_description, text, 2)
135
- else:
136
- with ThreadPoolExecutor(max_workers=2) as executor:
137
- # Generate a single Hume output
138
- future_audio_a = executor.submit(
139
- text_to_speech_with_hume, character_description, text
140
- )
141
- # Generate a second TTS output from the second provider
142
- match provider_b:
143
- case constants.ELEVENLABS:
144
- future_audio_b = executor.submit(
145
- text_to_speech_with_elevenlabs, character_description, text
146
- )
147
- case _:
148
- # Additional TTS Providers can be added here
149
- raise ValueError(f"Unsupported provider: {provider_b}")
150
-
151
- generation_id_a, audio_a = future_audio_a.result()
152
- generation_id_b, audio_b = future_audio_b.result()
153
-
154
- # Shuffle options so that placement of options in the UI will always be random
155
- option_a = Option(provider=provider_a, audio=audio_a, generation_id=generation_id_a)
156
- option_b = Option(provider=provider_b, audio=audio_b, generation_id=generation_id_b)
157
- options_map: OptionMap = create_shuffled_tts_options(option_a, option_b)
158
-
159
- option_a_audio = options_map["option_a"]["audio_file_path"]
160
- option_b_audio = options_map["option_b"]["audio_file_path"]
161
-
162
- return (
163
- gr.update(value=option_a_audio, visible=True, autoplay=True),
164
- gr.update(value=option_b_audio, visible=True),
165
- options_map,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  text_modified,
167
- text,
168
  character_description,
 
 
 
169
  )
170
- except ElevenLabsError as ee:
171
- logger.error(f"ElevenLabsError while synthesizing speech from text: {ee!s}")
172
- raise gr.Error(
173
- f'There was an issue communicating with the Elevenlabs API: "{ee.message}"'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  )
175
- except HumeError as he:
176
- logger.error(f"HumeError while synthesizing speech from text: {he!s}")
177
- raise gr.Error(
178
- f'There was an issue communicating with the Hume API: "{he.message}"'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  )
180
- except Exception as e:
181
- logger.error(f"Unexpected error during TTS generation: {e}")
182
- raise gr.Error("An unexpected error ocurred. Please try again later.")
183
-
184
-
185
- def vote(
186
- vote_submitted: bool,
187
- option_map: OptionMap,
188
- clicked_option_button: str,
189
- text_modified: bool,
190
- character_description: str,
191
- text: str,
192
- ) -> Tuple[bool, gr.update, gr.update, gr.update]:
193
- """
194
- Handles user voting.
195
-
196
- Args:
197
- vote_submitted (bool): True if a vote was already submitted.
198
- option_map (OptionMap): A dictionary mapping option labels to their details.
199
- Expected structure:
200
- {
201
- 'Option A': 'Hume AI',
202
- 'Option B': 'ElevenLabs',
203
- }
204
- selected_button (str): The button that was clicked.
205
-
206
- Returns:
207
- A tuple of:
208
- - A boolean indicating if the vote was accepted.
209
- - An update for the selected vote button (showing provider and trophy emoji).
210
- - An update for the unselected vote button (showing provider).
211
- - An update for enabling vote interactions.
212
- """
213
- if not option_map or vote_submitted:
214
- return gr.skip(), gr.skip(), gr.skip(), gr.skip()
215
-
216
- selected_option, other_option = determine_selected_option(clicked_option_button)
217
- selected_provider = option_map[selected_option]["provider"]
218
- other_provider = option_map[other_option]["provider"]
219
-
220
- # Report voting results to be persisted to results DB
221
- submit_voting_results(
222
- option_map,
223
- selected_option,
224
- text_modified,
225
- character_description,
226
- text,
227
- )
228
-
229
- # Build button text, displaying the provider and voice name, appending the trophy emoji to the selected option.
230
- selected_label = f"{selected_provider} {constants.TROPHY_EMOJI}"
231
- other_label = f"{other_provider}"
232
-
233
- return (
234
- True,
235
- (
236
- gr.update(value=selected_label, variant="primary")
237
- if selected_option == constants.OPTION_A_KEY
238
- else gr.update(value=other_label, variant="secondary")
239
- ),
240
- (
241
- gr.update(value=other_label, variant="secondary")
242
- if selected_option == constants.OPTION_A_KEY
243
- else gr.update(value=selected_label, variant="primary")
244
- ),
245
- gr.update(interactive=True),
246
- )
247
-
248
-
249
- def reset_ui() -> Tuple[gr.update, gr.update, gr.update, gr.update, None, bool]:
250
- """
251
- Resets UI state before generating new text.
252
-
253
- Returns:
254
- A tuple of updates for:
255
- - option_a_audio_player (clear audio)
256
- - option_b_audio_player (clear audio)
257
- - vote_button_a (disable and reset button text)
258
- - vote_button_a (disable and reset button text)
259
- - option_map_state (reset option map state)
260
- - vote_submitted_state (reset submitted vote state)
261
- """
262
- return (
263
- gr.update(value=None),
264
- gr.update(value=None, autoplay=False),
265
- gr.update(value=constants.SELECT_OPTION_A, variant="secondary"),
266
- gr.update(value=constants.SELECT_OPTION_B, variant="secondary"),
267
- None,
268
- False,
269
- )
270
-
271
-
272
- def build_input_section() -> Tuple[gr.Dropdown, gr.Textbox, gr.Button]:
273
- """
274
  Builds the input section including the sample character description dropdown, character
275
  description input, and generate text button.
276
- """
277
- sample_character_description_dropdown = gr.Dropdown(
278
- choices=list(constants.SAMPLE_CHARACTER_DESCRIPTIONS.keys()),
279
- label="Choose a sample character description",
280
- value=None,
281
- interactive=True,
282
- )
283
- character_description_input = gr.Textbox(
284
- label="Character Description",
285
- placeholder="Enter a character description...",
286
- lines=3,
287
- max_lines=8,
288
- max_length=constants.CHARACTER_DESCRIPTION_MAX_LENGTH,
289
- show_copy_button=True,
290
- )
291
- generate_text_button = gr.Button("Generate Text", variant="secondary")
292
- return (
293
- sample_character_description_dropdown,
294
- character_description_input,
295
- generate_text_button,
296
- )
297
-
298
-
299
- def build_output_section() -> (
300
- Tuple[gr.Textbox, gr.Button, gr.Audio, gr.Audio, gr.Button, gr.Button]
301
- ):
302
- """
303
- Builds the output section including text input, audio players, and vote buttons.
304
- """
305
- text_input = gr.Textbox(
306
- label="Input Text",
307
- placeholder="Enter or generate text for synthesis...",
308
- interactive=True,
309
- autoscroll=False,
310
- lines=3,
311
- max_lines=8,
312
- max_length=constants.CHARACTER_DESCRIPTION_MAX_LENGTH,
313
- show_copy_button=True,
314
- )
315
- synthesize_speech_button = gr.Button("Synthesize Speech", variant="primary")
316
- with gr.Row(equal_height=True):
317
- option_a_audio_player = gr.Audio(
318
- label=constants.OPTION_A_LABEL, type="filepath", interactive=False
319
- )
320
- option_b_audio_player = gr.Audio(
321
- label=constants.OPTION_B_LABEL, type="filepath", interactive=False
322
  )
323
- with gr.Row(equal_height=True):
324
- vote_button_a = gr.Button(constants.SELECT_OPTION_A, interactive=False)
325
- vote_button_b = gr.Button(constants.SELECT_OPTION_B, interactive=False)
326
- return (
327
- text_input,
328
- synthesize_speech_button,
329
- option_a_audio_player,
330
- option_b_audio_player,
331
- vote_button_a,
332
- vote_button_b,
333
- )
334
-
335
-
336
- def build_gradio_interface() -> gr.Blocks:
337
- """
338
- Builds and configures the Gradio user interface.
339
-
340
- Returns:
341
- gr.Blocks: The fully constructed Gradio UI layout.
342
- """
343
- custom_theme = CustomTheme()
344
- with gr.Blocks(
345
- title="Expressive TTS Arena",
346
- theme=custom_theme,
347
- fill_width=True,
348
- css_paths="src/assets/styles.css",
349
- ) as demo:
350
- # Title & instructions
351
- gr.Markdown("# Expressive TTS Arena")
352
- gr.Markdown(
353
- """
354
- 1. **Choose or enter a character description**: Select a sample from the list or enter your own to guide
355
- text and voice generation.
356
- 2. **Generate text**: Click **"Generate Text"** to create dialogue based on the character. The generated
357
- text will appear in the input field automatically—edit it if needed.
358
- 3. **Synthesize speech**: Click **"Synthesize Speech"** to send your text and character description to two
359
- TTS APIs. Each API generates a voice and synthesizes speech in that voice.
360
- 4. **Listen & compare**: Play both audio options and assess their expressiveness.
361
- 5. **Vote for the best**: Click **"Select Option A"** or **"Select Option B"** to choose the most
362
- expressive output.
363
- """
364
  )
365
-
366
- # Build generate text section
367
- (
368
  sample_character_description_dropdown,
369
  character_description_input,
370
  generate_text_button,
371
- ) = build_input_section()
372
 
373
- # Build synthesize speech section
374
- (
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
375
  text_input,
376
  synthesize_speech_button,
377
  option_a_audio_player,
378
  option_b_audio_player,
379
  vote_button_a,
380
  vote_button_b,
381
- ) = build_output_section()
382
-
383
- # --- UI state components ---
384
-
385
- # Track character description used for text and voice generation
386
- character_description_state = gr.State("")
387
- # Track text used for speech synthesis
388
- text_state = gr.State("")
389
- # Track generated text state
390
- generated_text_state = gr.State("")
391
- # Track whether text that was used was generated or modified/custom
392
- text_modified_state = gr.State()
393
-
394
- # Track option map (option A and option B are randomized)
395
- option_map_state = gr.State()
396
-
397
- # Track whether the user has voted for an option
398
- vote_submitted_state = gr.State(False)
399
-
400
- # --- Register event handlers ---
401
-
402
- # When a sample character description is chosen, update the character description textbox
403
- sample_character_description_dropdown.change(
404
- fn=lambda choice: constants.SAMPLE_CHARACTER_DESCRIPTIONS.get(choice, ""),
405
- inputs=[sample_character_description_dropdown],
406
- outputs=[character_description_input],
407
  )
408
 
409
- # Generate text button click handler chain:
410
- # 1. Disable the "Generate text" button
411
- # 2. Generate text
412
- # 3. Enable the "Generate text" button
413
- generate_text_button.click(
414
- fn=lambda: gr.update(interactive=False),
415
- inputs=[],
416
- outputs=[generate_text_button],
417
- ).then(
418
- fn=generate_text,
419
- inputs=[character_description_input],
420
- outputs=[text_input, generated_text_state],
421
- ).then(
422
- fn=lambda: gr.update(interactive=True),
423
- inputs=[],
424
- outputs=[generate_text_button],
425
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
426
 
427
- # Synthesize speech button click event handler chain:
428
- # 1. Disable the "Synthesize speech" button
429
- # 2. Reset UI state
430
- # 3. Synthesize speech, load audio players, and display vote button
431
- # 4. Enable the "Synthesize speech" button and display vote buttons
432
- synthesize_speech_button.click(
433
- fn=lambda: (
434
- gr.update(interactive=False),
435
- gr.update(interactive=False),
436
- gr.update(interactive=False),
437
- ),
438
- inputs=[],
439
- outputs=[synthesize_speech_button, vote_button_a, vote_button_b],
440
- ).then(
441
- fn=reset_ui,
442
- inputs=[],
443
- outputs=[
444
- option_a_audio_player,
445
- option_b_audio_player,
446
- vote_button_a,
447
- vote_button_b,
448
- option_map_state,
449
- vote_submitted_state,
450
- ],
451
- ).then(
452
- fn=synthesize_speech,
453
- inputs=[character_description_input, text_input, generated_text_state],
454
- outputs=[
455
  option_a_audio_player,
456
  option_b_audio_player,
457
- option_map_state,
458
- text_modified_state,
459
- text_state,
460
- character_description_state,
461
- ],
462
- ).then(
463
- fn=lambda: (
464
- gr.update(interactive=True),
465
- gr.update(interactive=True),
466
- gr.update(interactive=True),
467
- ),
468
- inputs=[],
469
- outputs=[synthesize_speech_button, vote_button_a, vote_button_b],
470
- )
471
-
472
- # Vote button click event handlers
473
- vote_button_a.click(
474
- fn=lambda: (
475
- gr.update(interactive=False),
476
- gr.update(interactive=False),
477
- ),
478
- inputs=[],
479
- outputs=[vote_button_a, vote_button_b],
480
- ).then(
481
- fn=vote,
482
- inputs=[
483
- vote_submitted_state,
484
- option_map_state,
485
- vote_button_a,
486
- text_modified_state,
487
- character_description_state,
488
- text_state,
489
- ],
490
- outputs=[
491
- vote_submitted_state,
492
- vote_button_a,
493
- vote_button_b,
494
- synthesize_speech_button,
495
- ],
496
- )
497
-
498
- vote_button_b.click(
499
- fn=lambda: (
500
- gr.update(interactive=False),
501
- gr.update(interactive=False),
502
- ),
503
- inputs=[],
504
- outputs=[vote_button_a, vote_button_b],
505
- ).then(
506
- fn=vote,
507
- inputs=[
508
- vote_submitted_state,
509
- option_map_state,
510
- vote_button_b,
511
- text_modified_state,
512
- character_description_state,
513
- text_state,
514
- ],
515
- outputs=[
516
- vote_submitted_state,
517
  vote_button_a,
518
  vote_button_b,
519
- synthesize_speech_button,
520
- ],
521
- )
522
-
523
- # Reload audio player B with audio and set autoplay to True (workaround to play audio back-to-back)
524
- option_a_audio_player.stop(
525
- fn=lambda option_map: gr.update(
526
- value=f"{option_map['option_b']['audio_file_path']}?t={int(time.time())}",
527
- autoplay=True,
528
- ),
529
- inputs=[option_map_state],
530
- outputs=[option_b_audio_player],
531
- )
532
-
533
- # Enable voting after second audio option playback finishes
534
- option_b_audio_player.stop(
535
- fn=lambda _: gr.update(autoplay=False),
536
- inputs=[],
537
- outputs=[option_b_audio_player],
538
- )
539
-
540
- logger.debug("Gradio interface built successfully")
541
- return demo
542
-
543
-
544
- if __name__ == "__main__":
545
- logger.info("Launching TTS Arena Gradio app...")
546
- demo = build_gradio_interface()
547
- demo.launch(server_name="0.0.0.0", allowed_paths=[AUDIO_DIR])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  # Local Application Imports
20
  from src import constants
21
+ from src.config import Config, logger
22
  from src.custom_types import ComparisonType, Option, OptionMap
23
+ from src.database.database import DBSessionMaker
24
  from src.integrations import (
25
  AnthropicError,
26
  ElevenLabsError,
 
39
  )
40
 
41
 
42
+ class App:
43
+ config: Config
44
+ db_session_maker: DBSessionMaker
45
+
46
+ def __init__(self, config: Config, db_session_maker: DBSessionMaker):
47
+ self.config = config
48
+ self.db_session_maker = db_session_maker
49
+
50
+ def generate_text(
51
+ self,
52
+ character_description: str,
53
+ ) -> Tuple[Union[str, gr.update], gr.update]:
54
+ """
55
+ Validates the character_description and generates text using Anthropic API.
56
+
57
+ Args:
58
+ character_description (str): The user-provided text for character description.
59
+
60
+ Returns:
61
+ Tuple containing:
62
+ - The generated text (as a gr.update).
63
+ - An update for the generated text state.
64
+
65
+ Raises:
66
+ gr.Error: On validation or API errors.
67
+ """
68
+ try:
69
+ validate_character_description_length(character_description)
70
+ except ValueError as ve:
71
+ logger.warning(f"Validation error: {ve}")
72
+ raise gr.Error(str(ve))
73
+
74
+ try:
75
+ generated_text = generate_text_with_claude(character_description, self.config)
76
+ logger.info(f"Generated text ({len(generated_text)} characters).")
77
+ return gr.update(value=generated_text), generated_text
78
+ except AnthropicError as ae:
79
+ logger.error(f"AnthropicError while generating text: {ae!s}")
80
+ raise gr.Error(f'There was an issue communicating with the Anthropic API: "{ae.message}"')
81
+ except Exception as e:
82
+ logger.error(f"Unexpected error while generating text: {e}")
83
+ raise gr.Error("Failed to generate text. Please try again later.")
84
+
85
+ def synthesize_speech(
86
+ self,
87
+ character_description: str,
88
+ text: str,
89
+ generated_text_state: str,
90
+ ) -> Tuple[gr.update, gr.update, dict, str, ComparisonType, str, str, bool, str, str]:
91
+ """
92
+ Synthesizes two text-to-speech outputs, updates UI state components, and returns additional TTS metadata.
93
+
94
+ This function generates TTS outputs using different providers based on the input text and its modification
95
+ state. Depending on the selected providers, it may:
96
+ - Synthesize one Hume and one ElevenLabs output (50% chance), or
97
+ - Synthesize two Hume outputs (50% chance).
98
+
99
+ The outputs are processed and shuffled, and the corresponding UI components for two audio players are updated.
100
+ Additional metadata such as the generation IDs, comparison type, and state information are also returned.
101
+
102
+ Args:
103
+ character_description (str): The description of the character used for generating the voice.
104
+ text (str): The text content to be synthesized into speech.
105
+ generated_text_state (str): The previously generated text state, used to determine if the text has
106
+ been modified.
107
+
108
+ Returns:
109
+ Tuple containing:
110
+ - gr.update: Update for the first audio player (with autoplay enabled).
111
+ - gr.update: Update for the second audio player.
112
+ - dict: A mapping of option constants to their corresponding TTS providers.
113
+ - str: The raw audio value (relative file path) for option B.
114
+ - ComparisonType: The comparison type between the selected TTS providers.
115
+ - str: Generation ID for option A.
116
+ - str: Generation ID for option B.
117
+ - bool: Flag indicating whether the text was modified.
118
+ - str: The original text that was synthesized.
119
+ - str: The original character description.
120
+
121
+ Raises:
122
+ gr.Error: If any API or unexpected errors occur during the TTS synthesis process.
123
+ """
124
+ if not text:
125
+ logger.warning("Skipping text-to-speech due to empty text.")
126
+ raise gr.Error("Please generate or enter text to synthesize.")
127
+
128
+ # Select 2 TTS providers based on whether the text has been modified.
129
+ text_modified = text != generated_text_state
130
+ provider_a, provider_b = choose_providers(text_modified, character_description)
131
+
132
+ try:
133
+ if provider_b == constants.HUME_AI:
134
+ num_generations = 2
135
+ # If generating 2 Hume outputs, do so in a single API call
136
+ (
137
+ generation_id_a,
138
+ audio_a,
139
+ generation_id_b,
140
+ audio_b,
141
+ ) = text_to_speech_with_hume(character_description, text, num_generations, self.config)
142
+ else:
143
+ with ThreadPoolExecutor(max_workers=2) as executor:
144
+ num_generations = 1
145
+ # Generate a single Hume output
146
+ future_audio_a = executor.submit(
147
+ text_to_speech_with_hume, character_description, text, num_generations, self.config
148
+ )
149
+ # Generate a second TTS output from the second provider
150
+ match provider_b:
151
+ case constants.ELEVENLABS:
152
+ future_audio_b = executor.submit(
153
+ text_to_speech_with_elevenlabs, character_description, text, self.config
154
+ )
155
+ case _:
156
+ # Additional TTS Providers can be added here
157
+ raise ValueError(f"Unsupported provider: {provider_b}")
158
+
159
+ generation_id_a, audio_a = future_audio_a.result()
160
+ generation_id_b, audio_b = future_audio_b.result()
161
+
162
+ # Shuffle options so that placement of options in the UI will always be random
163
+ option_a = Option(provider=provider_a, audio=audio_a, generation_id=generation_id_a)
164
+ option_b = Option(provider=provider_b, audio=audio_b, generation_id=generation_id_b)
165
+ options_map: OptionMap = create_shuffled_tts_options(option_a, option_b)
166
+
167
+ option_a_audio = options_map["option_a"]["audio_file_path"]
168
+ option_b_audio = options_map["option_b"]["audio_file_path"]
169
+
170
+ return (
171
+ gr.update(value=option_a_audio, visible=True, autoplay=True),
172
+ gr.update(value=option_b_audio, visible=True),
173
+ options_map,
174
+ text_modified,
175
+ text,
176
+ character_description,
177
+ )
178
+ except ElevenLabsError as ee:
179
+ logger.error(f"ElevenLabsError while synthesizing speech from text: {ee!s}")
180
+ raise gr.Error(f'There was an issue communicating with the Elevenlabs API: "{ee.message}"')
181
+ except HumeError as he:
182
+ logger.error(f"HumeError while synthesizing speech from text: {he!s}")
183
+ raise gr.Error(f'There was an issue communicating with the Hume API: "{he.message}"')
184
+ except Exception as e:
185
+ logger.error(f"Unexpected error during TTS generation: {e}")
186
+ raise gr.Error("An unexpected error ocurred. Please try again later.")
187
+
188
+ def vote(
189
+ self,
190
+ vote_submitted: bool,
191
+ option_map: OptionMap,
192
+ clicked_option_button: str,
193
+ text_modified: bool,
194
+ character_description: str,
195
+ text: str,
196
+ ) -> Tuple[bool, gr.update, gr.update, gr.update]:
197
+ """
198
+ Handles user voting.
199
+
200
+ Args:
201
+ vote_submitted (bool): True if a vote was already submitted.
202
+ option_map (OptionMap): A dictionary mapping option labels to their details.
203
+ Expected structure:
204
+ {
205
+ 'Option A': 'Hume AI',
206
+ 'Option B': 'ElevenLabs',
207
+ }
208
+ selected_button (str): The button that was clicked.
209
+
210
+ Returns:
211
+ A tuple of:
212
+ - A boolean indicating if the vote was accepted.
213
+ - An update for the selected vote button (showing provider and trophy emoji).
214
+ - An update for the unselected vote button (showing provider).
215
+ - An update for enabling vote interactions.
216
+ """
217
+ if not option_map or vote_submitted:
218
+ return gr.skip(), gr.skip(), gr.skip(), gr.skip()
219
+
220
+ selected_option, other_option = determine_selected_option(clicked_option_button)
221
+ selected_provider = option_map[selected_option]["provider"]
222
+ other_provider = option_map[other_option]["provider"]
223
+
224
+ # Report voting results to be persisted to results DB
225
+ submit_voting_results(
226
+ option_map,
227
+ selected_option,
228
  text_modified,
 
229
  character_description,
230
+ text,
231
+ self.db_session_maker,
232
+ self.config,
233
  )
234
+
235
+ # Build button text, displaying the provider and voice name, appending the trophy emoji to the selected option.
236
+ selected_label = f"{selected_provider} {constants.TROPHY_EMOJI}"
237
+ other_label = f"{other_provider}"
238
+
239
+ return (
240
+ True,
241
+ (
242
+ gr.update(value=selected_label, variant="primary")
243
+ if selected_option == constants.OPTION_A_KEY
244
+ else gr.update(value=other_label, variant="secondary")
245
+ ),
246
+ (
247
+ gr.update(value=other_label, variant="secondary")
248
+ if selected_option == constants.OPTION_A_KEY
249
+ else gr.update(value=selected_label, variant="primary")
250
+ ),
251
+ gr.update(interactive=True),
252
  )
253
+
254
+ def reset_ui(self) -> Tuple[gr.update, gr.update, gr.update, gr.update, None, bool]:
255
+ """
256
+ Resets UI state before generating new text.
257
+
258
+ Returns:
259
+ A tuple of updates for:
260
+ - option_a_audio_player (clear audio)
261
+ - option_b_audio_player (clear audio)
262
+ - vote_button_a (disable and reset button text)
263
+ - vote_button_a (disable and reset button text)
264
+ - option_map_state (reset option map state)
265
+ - vote_submitted_state (reset submitted vote state)
266
+ """
267
+ return (
268
+ gr.update(value=None),
269
+ gr.update(value=None, autoplay=False),
270
+ gr.update(value=constants.SELECT_OPTION_A, variant="secondary"),
271
+ gr.update(value=constants.SELECT_OPTION_B, variant="secondary"),
272
+ None,
273
+ False,
274
  )
275
+
276
+ def build_input_section(self) -> Tuple[gr.Dropdown, gr.Textbox, gr.Button]:
277
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  Builds the input section including the sample character description dropdown, character
279
  description input, and generate text button.
280
+ """
281
+ sample_character_description_dropdown = gr.Dropdown(
282
+ choices=list(constants.SAMPLE_CHARACTER_DESCRIPTIONS.keys()),
283
+ label="Choose a sample character description",
284
+ value=None,
285
+ interactive=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  )
287
+ character_description_input = gr.Textbox(
288
+ label="Character Description",
289
+ placeholder="Enter a character description...",
290
+ lines=3,
291
+ max_lines=8,
292
+ max_length=constants.CHARACTER_DESCRIPTION_MAX_LENGTH,
293
+ show_copy_button=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
  )
295
+ generate_text_button = gr.Button("Generate Text", variant="secondary")
296
+ return (
 
297
  sample_character_description_dropdown,
298
  character_description_input,
299
  generate_text_button,
300
+ )
301
 
302
+ def build_output_section(self) -> Tuple[gr.Textbox, gr.Button, gr.Audio, gr.Audio, gr.Button, gr.Button]:
303
+ """
304
+ Builds the output section including text input, audio players, and vote buttons.
305
+ """
306
+ text_input = gr.Textbox(
307
+ label="Input Text",
308
+ placeholder="Enter or generate text for synthesis...",
309
+ interactive=True,
310
+ autoscroll=False,
311
+ lines=3,
312
+ max_lines=8,
313
+ max_length=constants.CHARACTER_DESCRIPTION_MAX_LENGTH,
314
+ show_copy_button=True,
315
+ )
316
+ synthesize_speech_button = gr.Button("Synthesize Speech", variant="primary")
317
+ with gr.Row(equal_height=True):
318
+ option_a_audio_player = gr.Audio(label=constants.OPTION_A_LABEL, type="filepath", interactive=False)
319
+ option_b_audio_player = gr.Audio(label=constants.OPTION_B_LABEL, type="filepath", interactive=False)
320
+ with gr.Row(equal_height=True):
321
+ vote_button_a = gr.Button(constants.SELECT_OPTION_A, interactive=False)
322
+ vote_button_b = gr.Button(constants.SELECT_OPTION_B, interactive=False)
323
+ return (
324
  text_input,
325
  synthesize_speech_button,
326
  option_a_audio_player,
327
  option_b_audio_player,
328
  vote_button_a,
329
  vote_button_b,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  )
331
 
332
+ def build_gradio_interface(self) -> gr.Blocks:
333
+ """
334
+ Builds and configures the Gradio user interface.
335
+
336
+ Returns:
337
+ gr.Blocks: The fully constructed Gradio UI layout.
338
+ """
339
+ custom_theme = CustomTheme()
340
+ with gr.Blocks(
341
+ title="Expressive TTS Arena",
342
+ theme=custom_theme,
343
+ fill_width=True,
344
+ css_paths="src/assets/styles.css",
345
+ ) as demo:
346
+ # Title & instructions
347
+ gr.Markdown("# Expressive TTS Arena")
348
+ gr.Markdown(
349
+ """
350
+ 1. **Choose or enter a character description**: Select a sample from the list or enter your own to guide
351
+ text and voice generation.
352
+ 2. **Generate text**: Click **"Generate Text"** to create dialogue based on the character. The generated
353
+ text will appear in the input field automatically—edit it if needed.
354
+ 3. **Synthesize speech**: Click **"Synthesize Speech"** to send your text and character description to
355
+ two TTS APIs. Each API generates a voice and synthesizes speech in that voice.
356
+ 4. **Listen & compare**: Play both audio options and assess their expressiveness.
357
+ 5. **Vote for the best**: Click **"Select Option A"** or **"Select Option B"** to choose the most
358
+ expressive output.
359
+ """
360
+ )
361
+
362
+ # Build generate text section
363
+ (
364
+ sample_character_description_dropdown,
365
+ character_description_input,
366
+ generate_text_button,
367
+ ) = self.build_input_section()
368
 
369
+ # Build synthesize speech section
370
+ (
371
+ text_input,
372
+ synthesize_speech_button,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
373
  option_a_audio_player,
374
  option_b_audio_player,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
375
  vote_button_a,
376
  vote_button_b,
377
+ ) = self.build_output_section()
378
+
379
+ # --- UI state components ---
380
+
381
+ # Track character description used for text and voice generation
382
+ character_description_state = gr.State("")
383
+ # Track text used for speech synthesis
384
+ text_state = gr.State("")
385
+ # Track generated text state
386
+ generated_text_state = gr.State("")
387
+ # Track whether text that was used was generated or modified/custom
388
+ text_modified_state = gr.State()
389
+
390
+ # Track option map (option A and option B are randomized)
391
+ option_map_state = gr.State()
392
+
393
+ # Track whether the user has voted for an option
394
+ vote_submitted_state = gr.State(False)
395
+
396
+ # --- Register event handlers ---
397
+
398
+ # When a sample character description is chosen, update the character description textbox
399
+ sample_character_description_dropdown.change(
400
+ fn=lambda choice: constants.SAMPLE_CHARACTER_DESCRIPTIONS.get(choice, ""),
401
+ inputs=[sample_character_description_dropdown],
402
+ outputs=[character_description_input],
403
+ )
404
+
405
+ # Generate text button click handler chain:
406
+ # 1. Disable the "Generate text" button
407
+ # 2. Generate text
408
+ # 3. Enable the "Generate text" button
409
+ generate_text_button.click(
410
+ fn=lambda: gr.update(interactive=False),
411
+ inputs=[],
412
+ outputs=[generate_text_button],
413
+ ).then(
414
+ fn=self.generate_text,
415
+ inputs=[character_description_input],
416
+ outputs=[text_input, generated_text_state],
417
+ ).then(
418
+ fn=lambda: gr.update(interactive=True),
419
+ inputs=[],
420
+ outputs=[generate_text_button],
421
+ )
422
+
423
+ # Synthesize speech button click event handler chain:
424
+ # 1. Disable the "Synthesize speech" button
425
+ # 2. Reset UI state
426
+ # 3. Synthesize speech, load audio players, and display vote button
427
+ # 4. Enable the "Synthesize speech" button and display vote buttons
428
+ synthesize_speech_button.click(
429
+ fn=lambda: (
430
+ gr.update(interactive=False),
431
+ gr.update(interactive=False),
432
+ gr.update(interactive=False),
433
+ ),
434
+ inputs=[],
435
+ outputs=[synthesize_speech_button, vote_button_a, vote_button_b],
436
+ ).then(
437
+ fn=self.reset_ui,
438
+ inputs=[],
439
+ outputs=[
440
+ option_a_audio_player,
441
+ option_b_audio_player,
442
+ vote_button_a,
443
+ vote_button_b,
444
+ option_map_state,
445
+ vote_submitted_state,
446
+ ],
447
+ ).then(
448
+ fn=self.synthesize_speech,
449
+ inputs=[character_description_input, text_input, generated_text_state],
450
+ outputs=[
451
+ option_a_audio_player,
452
+ option_b_audio_player,
453
+ option_map_state,
454
+ text_modified_state,
455
+ text_state,
456
+ character_description_state,
457
+ ],
458
+ ).then(
459
+ fn=lambda: (
460
+ gr.update(interactive=True),
461
+ gr.update(interactive=True),
462
+ gr.update(interactive=True),
463
+ ),
464
+ inputs=[],
465
+ outputs=[synthesize_speech_button, vote_button_a, vote_button_b],
466
+ )
467
+
468
+ # Vote button click event handlers
469
+ vote_button_a.click(
470
+ fn=lambda: (
471
+ gr.update(interactive=False),
472
+ gr.update(interactive=False),
473
+ ),
474
+ inputs=[],
475
+ outputs=[vote_button_a, vote_button_b],
476
+ ).then(
477
+ fn=self.vote,
478
+ inputs=[
479
+ vote_submitted_state,
480
+ option_map_state,
481
+ vote_button_a,
482
+ text_modified_state,
483
+ character_description_state,
484
+ text_state,
485
+ ],
486
+ outputs=[
487
+ vote_submitted_state,
488
+ vote_button_a,
489
+ vote_button_b,
490
+ synthesize_speech_button,
491
+ ],
492
+ )
493
+
494
+ vote_button_b.click(
495
+ fn=lambda: (
496
+ gr.update(interactive=False),
497
+ gr.update(interactive=False),
498
+ ),
499
+ inputs=[],
500
+ outputs=[vote_button_a, vote_button_b],
501
+ ).then(
502
+ fn=self.vote,
503
+ inputs=[
504
+ vote_submitted_state,
505
+ option_map_state,
506
+ vote_button_b,
507
+ text_modified_state,
508
+ character_description_state,
509
+ text_state,
510
+ ],
511
+ outputs=[
512
+ vote_submitted_state,
513
+ vote_button_a,
514
+ vote_button_b,
515
+ synthesize_speech_button,
516
+ ],
517
+ )
518
+
519
+ # Reload audio player B with audio and set autoplay to True (workaround to play audio back-to-back)
520
+ option_a_audio_player.stop(
521
+ fn=lambda option_map: gr.update(
522
+ value=f"{option_map['option_b']['audio_file_path']}?t={int(time.time())}",
523
+ autoplay=True,
524
+ ),
525
+ inputs=[option_map_state],
526
+ outputs=[option_b_audio_player],
527
+ )
528
+
529
+ # Enable voting after second audio option playback finishes
530
+ option_b_audio_player.stop(
531
+ fn=lambda _: gr.update(autoplay=False),
532
+ inputs=[],
533
+ outputs=[option_b_audio_player],
534
+ )
535
+
536
+ logger.debug("Gradio interface built successfully")
537
+ return demo
src/config.py CHANGED
@@ -13,71 +13,78 @@ Key Features:
13
  # Standard Library Imports
14
  import logging
15
  import os
 
16
  from pathlib import Path
 
17
 
18
  # Third-Party Library Imports
19
  from dotenv import load_dotenv
20
 
21
- # Determine the environment (defaults to "dev" if not explicitly set)
22
- APP_ENV = os.getenv("APP_ENV", "dev").lower()
23
- if APP_ENV not in {"dev", "prod"}:
24
- APP_ENV = "dev"
25
 
26
-
27
- # In development, load environment variables from .env file (not used in production)
28
- if APP_ENV == "dev" and Path(".env").exists():
29
- load_dotenv(".env", override=True)
30
-
31
-
32
- # Enable debug mode if in development (or if explicitly set in env variables)
33
- DEBUG = APP_ENV == "dev" or os.getenv("DEBUG", "false").lower() == "true"
34
-
35
- # Configure the logger
36
- logging.basicConfig(
37
- level=logging.DEBUG if DEBUG else logging.INFO,
38
- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
39
- )
40
  logger: logging.Logger = logging.getLogger("tts_arena")
41
- logger.info(f'App running in "{APP_ENV}" mode.')
42
- logger.info(f'Debug mode is {"enabled" if DEBUG else "disabled"}.')
43
 
44
- if DEBUG:
45
- logger.debug("DEBUG mode enabled.")
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- # Define the directory for audio files relative to the project root
49
- AUDIO_DIR = Path.cwd() / "static" / "audio"
50
- AUDIO_DIR.mkdir(parents=True, exist_ok=True)
51
- logger.info(f"Audio directory set to {AUDIO_DIR}")
52
-
53
-
54
- def validate_env_var(var_name: str) -> str:
55
- """
56
- Validates that an environment variable is set and returns its value.
57
-
58
- Args:
59
- var_name (str): The name of the environment variable to validate.
60
 
61
- Returns:
62
- str: The value of the environment variable.
63
 
64
- Raises:
65
- ValueError: If the environment variable is not set.
66
 
67
- Examples:
68
- >>> import os
69
- >>> os.environ["EXAMPLE_VAR"] = "example_value"
70
- >>> validate_env_var("EXAMPLE_VAR")
71
- 'example_value'
72
 
73
- >>> validate_env_var("MISSING_VAR")
74
- Traceback (most recent call last):
75
- ...
76
- ValueError: MISSING_VAR is not set. Please ensure it is defined in your environment variables.
77
- """
78
- value = os.environ.get(var_name, "")
79
- if not value:
80
- raise ValueError(
81
- f"{var_name} is not set. Please ensure it is defined in your environment variables."
82
  )
83
- return value
 
13
  # Standard Library Imports
14
  import logging
15
  import os
16
+ from dataclasses import dataclass
17
  from pathlib import Path
18
+ from typing import TYPE_CHECKING, ClassVar, Optional
19
 
20
  # Third-Party Library Imports
21
  from dotenv import load_dotenv
22
 
23
+ if TYPE_CHECKING:
24
+ from src.integrations import AnthropicConfig, ElevenLabsConfig, HumeConfig
 
 
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  logger: logging.Logger = logging.getLogger("tts_arena")
 
 
27
 
 
 
28
 
29
+ @dataclass(frozen=True)
30
+ class Config:
31
+ _config: ClassVar[Optional["Config"]] = None
32
+ app_env: str
33
+ debug: bool
34
+ database_url: Optional[str]
35
+ audio_dir: Path
36
+ anthropic_config: "AnthropicConfig"
37
+ hume_config: "HumeConfig"
38
+ elevenlabs_config: "ElevenLabsConfig"
39
+
40
+ @classmethod
41
+ def get(cls) -> "Config":
42
+ if cls._config is None:
43
+ _config = Config._init()
44
+ cls._config = _config
45
+ return _config
46
+ return cls._config
47
+
48
+ @staticmethod
49
+ def _init():
50
+ app_env = os.getenv("APP_ENV", "dev").lower()
51
+ if app_env not in {"dev", "prod"}:
52
+ app_env = "dev"
53
+
54
+ # In development, load environment variables from .env file (not used in production)
55
+ if app_env == "dev" and Path(".env").exists():
56
+ load_dotenv(".env", override=True)
57
+
58
+ # Enable debug mode if in development (or if explicitly set in env variables)
59
+ debug = app_env == "dev" or os.getenv("DEBUG", "false").lower() == "true"
60
+
61
+ database_url = os.getenv("DATABASE_URL")
62
+
63
+ # Configure the logger
64
+ logging.basicConfig(
65
+ level=logging.DEBUG if debug else logging.INFO,
66
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
67
+ )
68
+ logger.info(f'App running in "{app_env}" mode.')
69
+ logger.info(f"Debug mode is {'enabled' if debug else 'disabled'}.")
70
 
71
+ # Define the directory for audio files relative to the project root
72
+ audio_dir = Path.cwd() / "static" / "audio"
73
+ audio_dir.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
 
 
 
74
 
75
+ logger.info(f"Audio directory set to {audio_dir}")
 
76
 
77
+ if debug:
78
+ logger.debug("DEBUG mode enabled.")
79
 
80
+ from src.integrations import AnthropicConfig, ElevenLabsConfig, HumeConfig
 
 
 
 
81
 
82
+ return Config(
83
+ app_env=app_env,
84
+ debug=debug,
85
+ database_url=database_url,
86
+ audio_dir=audio_dir,
87
+ anthropic_config=AnthropicConfig(),
88
+ hume_config=HumeConfig(),
89
+ elevenlabs_config=ElevenLabsConfig(),
 
90
  )
 
src/database/__init__.py CHANGED
@@ -1,9 +1,10 @@
1
  from .crud import create_vote
2
- from .database import Base, SessionLocal, engine
3
 
4
  __all__ = [
5
  "Base",
6
- "SessionLocal",
7
  "create_vote",
8
- "engine"
 
9
  ]
 
1
  from .crud import create_vote
2
+ from .database import Base, DBSessionMaker, engine, init_db
3
 
4
  __all__ = [
5
  "Base",
6
+ "DBSessionMaker",
7
  "create_vote",
8
+ "engine",
9
+ "init_db",
10
  ]
src/database/database.py CHANGED
@@ -9,65 +9,71 @@ If no DATABASE_URL environment variable is set, then create a dummy database to
9
  """
10
 
11
  # Standard Library Imports
12
- import os
13
 
14
  # Third-Party Library Imports
15
- from sqlalchemy import create_engine
16
  from sqlalchemy.orm import declarative_base, sessionmaker
17
 
18
  # Local Application Imports
19
- from src.config import APP_ENV
20
-
21
- DATABASE_URL = os.getenv("DATABASE_URL")
22
-
23
- if APP_ENV == "prod":
24
- # In production, a valid DATABASE_URL is required.
25
- if not DATABASE_URL:
26
- raise ValueError("DATABASE_URL must be set in production!")
27
-
28
- engine = create_engine(DATABASE_URL)
29
- SessionLocal = sessionmaker(bind=engine)
30
- # In development, if a DATABASE_URL is provided, use it.
31
- elif DATABASE_URL:
32
- engine = create_engine(DATABASE_URL)
33
- SessionLocal = sessionmaker(bind=engine)
34
- else:
35
- # No DATABASE_URL is provided; use a DummySession that does nothing.
36
- engine = None
37
 
38
- class DummySession:
39
- is_dummy = True # Flag to indicate this is a dummy session.
40
 
41
- def __enter__(self):
42
- return self
43
 
44
- def __exit__(self, exc_type, exc_value, traceback):
45
- pass
46
 
47
- def add(self, _instance, _warn=True):
48
- # No-op: simply ignore adding the instance.
49
- pass
50
 
51
- def commit(self):
52
- # Raise an exception to simulate failure when attempting a write.
53
- raise RuntimeError("DummySession does not support commit operations.")
54
 
55
- def refresh(self, _instance):
56
- # Raise an exception to simulate failure when attempting to refresh.
57
- raise RuntimeError("DummySession does not support refresh operations.")
58
 
59
- def rollback(self):
60
- # No-op: there's nothing to roll back.
61
- pass
62
 
63
- def close(self):
64
- # No-op: nothing to close.
65
- pass
66
 
67
- def dummy_session_factory():
68
- return DummySession()
 
69
 
70
- SessionLocal = dummy_session_factory
71
 
72
- # Declarative base class for ORM models.
73
  Base = declarative_base()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  """
10
 
11
  # Standard Library Imports
12
+ from typing import Callable, Optional
13
 
14
  # Third-Party Library Imports
15
+ from sqlalchemy import Engine, create_engine
16
  from sqlalchemy.orm import declarative_base, sessionmaker
17
 
18
  # Local Application Imports
19
+ from src.config import Config
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
 
 
21
 
22
+ class DummySession:
23
+ is_dummy = True # Flag to indicate this is a dummy session.
24
 
25
+ def __enter__(self):
26
+ return self
27
 
28
+ def __exit__(self, exc_type, exc_value, traceback):
29
+ pass
 
30
 
31
+ def add(self, _instance, _warn=True):
32
+ # No-op: simply ignore adding the instance.
33
+ pass
34
 
35
+ def commit(self):
36
+ # Raise an exception to simulate failure when attempting a write.
37
+ raise RuntimeError("DummySession does not support commit operations.")
38
 
39
+ def refresh(self, _instance):
40
+ # Raise an exception to simulate failure when attempting to refresh.
41
+ raise RuntimeError("DummySession does not support refresh operations.")
42
 
43
+ def rollback(self):
44
+ # No-op: there's nothing to roll back.
45
+ pass
46
 
47
+ def close(self):
48
+ # No-op: nothing to close.
49
+ pass
50
 
 
51
 
 
52
  Base = declarative_base()
53
+ engine: Optional[Engine] = None
54
+
55
+ DBSessionMaker = sessionmaker | Callable[[], DummySession]
56
+
57
+
58
+ def init_db(config: Config) -> DBSessionMaker:
59
+ # ruff doesn't like setting global variables, but this is practical here
60
+ global engine # noqa
61
+
62
+ if config.app_env == "prod":
63
+ # In production, a valid DATABASE_URL is required.
64
+ if not config.database_url:
65
+ raise ValueError("DATABASE_URL must be set in production!")
66
+
67
+ engine = create_engine(config.database_url)
68
+ return sessionmaker(bind=engine)
69
+ # In development, if a DATABASE_URL is provided, use it.
70
+ if config.database_url:
71
+ engine = create_engine(config.database_url)
72
+ return sessionmaker(bind=engine)
73
+ # No DATABASE_URL is provided; use a DummySession that does nothing.
74
+ engine = None
75
+
76
+ def dummy_session_factory():
77
+ return DummySession()
78
+
79
+ return dummy_session_factory
src/database/models.py CHANGED
@@ -34,6 +34,7 @@ class OptionEnum(str, Enum):
34
  OPTION_A = "option_a"
35
  OPTION_B = "option_b"
36
 
 
37
  class VoteResult(Base):
38
  __tablename__ = "vote_results"
39
 
 
34
  OPTION_A = "option_a"
35
  OPTION_B = "option_b"
36
 
37
+
38
  class VoteResult(Base):
39
  __tablename__ = "vote_results"
40
 
src/integrations/__init__.py CHANGED
@@ -1,12 +1,15 @@
1
- from .anthropic_api import AnthropicError, generate_text_with_claude
2
- from .elevenlabs_api import ElevenLabsError, text_to_speech_with_elevenlabs
3
- from .hume_api import HumeError, text_to_speech_with_hume
4
 
5
  __all__ = [
 
6
  "AnthropicError",
 
7
  "ElevenLabsError",
 
8
  "HumeError",
9
  "generate_text_with_claude",
10
  "text_to_speech_with_elevenlabs",
11
- "text_to_speech_with_hume"
12
  ]
 
1
+ from .anthropic_api import AnthropicConfig, AnthropicError, generate_text_with_claude
2
+ from .elevenlabs_api import ElevenLabsConfig, ElevenLabsError, text_to_speech_with_elevenlabs
3
+ from .hume_api import HumeConfig, HumeError, text_to_speech_with_hume
4
 
5
  __all__ = [
6
+ "AnthropicConfig",
7
  "AnthropicError",
8
+ "ElevenLabsConfig",
9
  "ElevenLabsError",
10
+ "HumeConfig",
11
  "HumeError",
12
  "generate_text_with_claude",
13
  "text_to_speech_with_elevenlabs",
14
+ "text_to_speech_with_hume",
15
  ]
src/integrations/anthropic_api.py CHANGED
@@ -29,9 +29,9 @@ from anthropic.types import Message, ModelParam, TextBlock
29
  from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
30
 
31
  # Local Application Imports
32
- from src.config import logger, validate_env_var
33
  from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
34
- from src.utils import truncate_text
35
 
36
 
37
  @dataclass(frozen=True)
@@ -140,10 +140,6 @@ class UnretryableAnthropicError(AnthropicError):
140
  super().__init__(message, original_exception)
141
 
142
 
143
- # Initialize the Anthropic client
144
- anthropic_config = AnthropicConfig()
145
-
146
-
147
  @retry(
148
  stop=stop_after_attempt(3),
149
  wait=wait_fixed(2),
@@ -151,7 +147,7 @@ anthropic_config = AnthropicConfig()
151
  after=after_log(logger, logging.DEBUG),
152
  reraise=True,
153
  )
154
- def generate_text_with_claude(character_description: str) -> str:
155
  """
156
  Generates text using Claude (Anthropic LLM) via the Anthropic SDK.
157
 
@@ -165,10 +161,9 @@ def generate_text_with_claude(character_description: str) -> str:
165
  AnthropicError: If there is an error communicating with the Anthropic API.
166
  """
167
  # Build prompt for claude with character description
 
168
  prompt = anthropic_config.build_expressive_prompt(character_description)
169
- logger.debug(
170
- f"Generating text with Claude. Character description length: {len(prompt)} characters."
171
- )
172
 
173
  response = None
174
  try:
@@ -189,27 +184,20 @@ def generate_text_with_claude(character_description: str) -> str:
189
  # Process response
190
  blocks: Union[List[TextBlock], TextBlock, None] = response.content
191
  if isinstance(blocks, list):
192
- result = "\n\n".join(
193
- block.text for block in blocks if isinstance(block, TextBlock)
194
- )
195
  logger.debug(f"Processed response from list: {truncate_text(result)}")
196
  return result
197
  if isinstance(blocks, TextBlock):
198
- logger.debug(
199
- f"Processed response from single TextBlock: {truncate_text(blocks.text)}"
200
- )
201
  return blocks.text
202
 
203
  logger.warning(f"Unexpected response type: {type(blocks)}")
204
  return str(blocks or "No content generated.")
205
 
206
  except Exception as e:
207
- if (
208
- isinstance(e, APIError)
209
- and e.status_code >= CLIENT_ERROR_CODE and e.status_code < SERVER_ERROR_CODE
210
- ):
211
  raise UnretryableAnthropicError(
212
- message=f"\"{e.body['error']['message']}\"",
213
  original_exception=e,
214
  ) from e
215
 
 
29
  from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
30
 
31
  # Local Application Imports
32
+ from src.config import Config, logger
33
  from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
34
+ from src.utils import truncate_text, validate_env_var
35
 
36
 
37
  @dataclass(frozen=True)
 
140
  super().__init__(message, original_exception)
141
 
142
 
 
 
 
 
143
  @retry(
144
  stop=stop_after_attempt(3),
145
  wait=wait_fixed(2),
 
147
  after=after_log(logger, logging.DEBUG),
148
  reraise=True,
149
  )
150
+ def generate_text_with_claude(character_description: str, config: Config) -> str:
151
  """
152
  Generates text using Claude (Anthropic LLM) via the Anthropic SDK.
153
 
 
161
  AnthropicError: If there is an error communicating with the Anthropic API.
162
  """
163
  # Build prompt for claude with character description
164
+ anthropic_config = config.anthropic_config
165
  prompt = anthropic_config.build_expressive_prompt(character_description)
166
+ logger.debug(f"Generating text with Claude. Character description length: {len(prompt)} characters.")
 
 
167
 
168
  response = None
169
  try:
 
184
  # Process response
185
  blocks: Union[List[TextBlock], TextBlock, None] = response.content
186
  if isinstance(blocks, list):
187
+ result = "\n\n".join(block.text for block in blocks if isinstance(block, TextBlock))
 
 
188
  logger.debug(f"Processed response from list: {truncate_text(result)}")
189
  return result
190
  if isinstance(blocks, TextBlock):
191
+ logger.debug(f"Processed response from single TextBlock: {truncate_text(blocks.text)}")
 
 
192
  return blocks.text
193
 
194
  logger.warning(f"Unexpected response type: {type(blocks)}")
195
  return str(blocks or "No content generated.")
196
 
197
  except Exception as e:
198
+ if isinstance(e, APIError) and e.status_code >= CLIENT_ERROR_CODE and e.status_code < SERVER_ERROR_CODE:
 
 
 
199
  raise UnretryableAnthropicError(
200
+ message=f'"{e.body["error"]["message"]}"',
201
  original_exception=e,
202
  ) from e
203
 
src/integrations/elevenlabs_api.py CHANGED
@@ -31,9 +31,9 @@ from elevenlabs.core import ApiError
31
  from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
32
 
33
  # Local Application Imports
34
- from src.config import logger, validate_env_var
35
  from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
36
- from src.utils import save_base64_audio_to_file
37
 
38
 
39
  @dataclass(frozen=True)
@@ -76,10 +76,6 @@ class UnretryableElevenLabsError(ElevenLabsError):
76
  super().__init__(message, original_exception)
77
 
78
 
79
- # Initialize the ElevenLabs client
80
- elevenlabs_config = ElevenLabsConfig()
81
-
82
-
83
  @retry(
84
  stop=stop_after_attempt(3),
85
  wait=wait_fixed(2),
@@ -87,9 +83,7 @@ elevenlabs_config = ElevenLabsConfig()
87
  after=after_log(logger, logging.DEBUG),
88
  reraise=True,
89
  )
90
- def text_to_speech_with_elevenlabs(
91
- character_description: str, text: str
92
- ) -> Tuple[None, str]:
93
  """
94
  Synthesizes text to speech using the ElevenLabs TTS API, processes the audio data, and writes it to a file.
95
 
@@ -106,9 +100,9 @@ def text_to_speech_with_elevenlabs(
106
  Raises:
107
  ElevenLabsError: If there is an error communicating with the ElevenLabs API or processing the response.
108
  """
109
- logger.debug(
110
- f"Synthesizing speech with ElevenLabs. Text length: {len(text)} characters."
111
- )
112
 
113
  try:
114
  # Synthesize speech using the ElevenLabs SDK
@@ -129,16 +123,13 @@ def text_to_speech_with_elevenlabs(
129
  generated_voice_id = preview.generated_voice_id
130
  base64_audio = preview.audio_base_64
131
  filename = f"{generated_voice_id}.mp3"
132
- audio_file_path = save_base64_audio_to_file(base64_audio, filename)
133
 
134
  # Write audio to file and return the relative path
135
  return None, audio_file_path
136
 
137
  except Exception as e:
138
- if (
139
- isinstance(e, ApiError)
140
- and e.status_code >= CLIENT_ERROR_CODE and e.status_code < SERVER_ERROR_CODE
141
- ):
142
  raise UnretryableElevenLabsError(
143
  message=f"{e.body['detail']['message']}",
144
  original_exception=e,
 
31
  from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
32
 
33
  # Local Application Imports
34
+ from src.config import Config, logger
35
  from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
36
+ from src.utils import save_base64_audio_to_file, validate_env_var
37
 
38
 
39
  @dataclass(frozen=True)
 
76
  super().__init__(message, original_exception)
77
 
78
 
 
 
 
 
79
  @retry(
80
  stop=stop_after_attempt(3),
81
  wait=wait_fixed(2),
 
83
  after=after_log(logger, logging.DEBUG),
84
  reraise=True,
85
  )
86
+ def text_to_speech_with_elevenlabs(character_description: str, text: str, config: Config) -> Tuple[None, str]:
 
 
87
  """
88
  Synthesizes text to speech using the ElevenLabs TTS API, processes the audio data, and writes it to a file.
89
 
 
100
  Raises:
101
  ElevenLabsError: If there is an error communicating with the ElevenLabs API or processing the response.
102
  """
103
+ logger.debug(f"Synthesizing speech with ElevenLabs. Text length: {len(text)} characters.")
104
+
105
+ elevenlabs_config = config.elevenlabs_config
106
 
107
  try:
108
  # Synthesize speech using the ElevenLabs SDK
 
123
  generated_voice_id = preview.generated_voice_id
124
  base64_audio = preview.audio_base_64
125
  filename = f"{generated_voice_id}.mp3"
126
+ audio_file_path = save_base64_audio_to_file(base64_audio, filename, config)
127
 
128
  # Write audio to file and return the relative path
129
  return None, audio_file_path
130
 
131
  except Exception as e:
132
+ if isinstance(e, ApiError) and e.status_code >= CLIENT_ERROR_CODE and e.status_code < SERVER_ERROR_CODE:
 
 
 
133
  raise UnretryableElevenLabsError(
134
  message=f"{e.body['detail']['message']}",
135
  original_exception=e,
src/integrations/hume_api.py CHANGED
@@ -29,9 +29,9 @@ from requests.exceptions import HTTPError
29
  from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
30
 
31
  # Local Application Imports
32
- from src.config import logger, validate_env_var
33
  from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
34
- from src.utils import save_base64_audio_to_file
35
 
36
  HumeSupportedFileFormat = Literal["mp3", "pcm", "wav"]
37
  """ Support audio file formats for the Hume TTS API"""
@@ -85,7 +85,6 @@ class UnretryableHumeError(HumeError):
85
 
86
 
87
  # Initialize the Hume client
88
- hume_config = HumeConfig()
89
 
90
 
91
  @retry(
@@ -96,7 +95,7 @@ hume_config = HumeConfig()
96
  reraise=True,
97
  )
98
  def text_to_speech_with_hume(
99
- character_description: str, text: str, num_generations: int = 1
100
  ) -> Union[Tuple[str, str], Tuple[str, str, str, str]]:
101
  """
102
  Synthesizes text to speech using the Hume TTS API, processes audio data, and writes audio to a file.
@@ -135,6 +134,7 @@ def text_to_speech_with_hume(
135
  if num_generations < 1 or num_generations > 2:
136
  raise ValueError("Invalid number of generations specified. Must be 1 or 2.")
137
 
 
138
  request_body = {
139
  "utterances": [{"text": text, "description": character_description or None}],
140
  "format": {
@@ -161,20 +161,17 @@ def text_to_speech_with_hume(
161
 
162
  # Extract the base64 encoded audio and generation ID from the generation
163
  generation_a = generations[0]
164
- generation_a_id, audio_a_path = parse_hume_tts_generation(generation_a)
165
 
166
  if num_generations == 1:
167
  return (generation_a_id, audio_a_path)
168
 
169
  generation_b = generations[1]
170
- generation_b_id, audio_b_path = parse_hume_tts_generation(generation_b)
171
  return (generation_a_id, audio_a_path, generation_b_id, audio_b_path)
172
 
173
  except Exception as e:
174
- if (
175
- isinstance(e, HTTPError)
176
- and CLIENT_ERROR_CODE <= e.response.status_code < SERVER_ERROR_CODE
177
- ):
178
  raise UnretryableHumeError(
179
  message=f"{e.response.text}",
180
  original_exception=e,
@@ -186,7 +183,7 @@ def text_to_speech_with_hume(
186
  ) from e
187
 
188
 
189
- def parse_hume_tts_generation(generation: Dict[str, Any]) -> Tuple[str, str]:
190
  """
191
  Parse a Hume TTS generation response and save the decoded audio as an MP3 file.
192
 
@@ -220,5 +217,5 @@ def parse_hume_tts_generation(generation: Dict[str, Any]) -> Tuple[str, str]:
220
  raise KeyError("The generation dictionary is missing the 'audio' key.")
221
 
222
  filename = f"{generation_id}.mp3"
223
- audio_file_path = save_base64_audio_to_file(base64_audio, filename)
224
  return generation_id, audio_file_path
 
29
  from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
30
 
31
  # Local Application Imports
32
+ from src.config import Config, logger
33
  from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
34
+ from src.utils import save_base64_audio_to_file, validate_env_var
35
 
36
  HumeSupportedFileFormat = Literal["mp3", "pcm", "wav"]
37
  """ Support audio file formats for the Hume TTS API"""
 
85
 
86
 
87
  # Initialize the Hume client
 
88
 
89
 
90
  @retry(
 
95
  reraise=True,
96
  )
97
  def text_to_speech_with_hume(
98
+ character_description: str, text: str, num_generations: int, config: Config
99
  ) -> Union[Tuple[str, str], Tuple[str, str, str, str]]:
100
  """
101
  Synthesizes text to speech using the Hume TTS API, processes audio data, and writes audio to a file.
 
134
  if num_generations < 1 or num_generations > 2:
135
  raise ValueError("Invalid number of generations specified. Must be 1 or 2.")
136
 
137
+ hume_config = config.hume_config
138
  request_body = {
139
  "utterances": [{"text": text, "description": character_description or None}],
140
  "format": {
 
161
 
162
  # Extract the base64 encoded audio and generation ID from the generation
163
  generation_a = generations[0]
164
+ generation_a_id, audio_a_path = parse_hume_tts_generation(generation_a, config)
165
 
166
  if num_generations == 1:
167
  return (generation_a_id, audio_a_path)
168
 
169
  generation_b = generations[1]
170
+ generation_b_id, audio_b_path = parse_hume_tts_generation(generation_b, config)
171
  return (generation_a_id, audio_a_path, generation_b_id, audio_b_path)
172
 
173
  except Exception as e:
174
+ if isinstance(e, HTTPError) and CLIENT_ERROR_CODE <= e.response.status_code < SERVER_ERROR_CODE:
 
 
 
175
  raise UnretryableHumeError(
176
  message=f"{e.response.text}",
177
  original_exception=e,
 
183
  ) from e
184
 
185
 
186
+ def parse_hume_tts_generation(generation: Dict[str, Any], config: Config) -> Tuple[str, str]:
187
  """
188
  Parse a Hume TTS generation response and save the decoded audio as an MP3 file.
189
 
 
217
  raise KeyError("The generation dictionary is missing the 'audio' key.")
218
 
219
  filename = f"{generation_id}.mp3"
220
+ audio_file_path = save_base64_audio_to_file(base64_audio, filename, config)
221
  return generation_id, audio_file_path
src/main.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ main.py
3
+
4
+ This module is the entry point for the app. It loads configuration and starts the Gradio app.
5
+ """
6
+
7
+ from src.app import App
8
+ from src.config import Config, logger
9
+ from src.database.database import init_db
10
+
11
+ if __name__ == "__main__":
12
+ config = Config.get()
13
+ logger.info("Launching TTS Arena Gradio app...")
14
+ db_session_maker = init_db(config)
15
+ app = App(config, db_session_maker)
16
+ demo = app.build_gradio_interface()
17
+ init_db(config)
18
+ demo.launch(server_name="0.0.0.0", allowed_paths=[str(config.audio_dir)])
src/utils.py CHANGED
@@ -8,14 +8,17 @@ These functions provide reusable logic to simplify code in other modules.
8
  # Standard Library Imports
9
  import base64
10
  import json
 
11
  import random
12
  import time
13
  from pathlib import Path
14
- from typing import Tuple
 
 
15
 
16
  # Local Application Imports
17
  from src import constants
18
- from src.config import APP_ENV, AUDIO_DIR, logger
19
  from src.custom_types import (
20
  ComparisonType,
21
  Option,
@@ -24,7 +27,8 @@ from src.custom_types import (
24
  TTSProviderName,
25
  VotingResults,
26
  )
27
- from src.database import SessionLocal, crud
 
28
 
29
 
30
  def truncate_text(text: str, max_length: int = 50) -> str:
@@ -77,9 +81,7 @@ def validate_character_description_length(character_description: str) -> None:
77
  stripped_character_description = character_description.strip()
78
  character_description_length = len(stripped_character_description)
79
 
80
- logger.debug(
81
- f"Voice description length being validated: {character_description_length} characters"
82
- )
83
 
84
  if character_description_length < constants.CHARACTER_DESCRIPTION_MIN_LENGTH:
85
  raise ValueError(
@@ -95,9 +97,7 @@ def validate_character_description_length(character_description: str) -> None:
95
  )
96
 
97
  truncated_description = truncate_text(stripped_character_description)
98
- logger.debug(
99
- f"Character description length validation passed for character_description: {truncated_description}"
100
- )
101
 
102
 
103
  def delete_files_older_than(directory: str, minutes: int = 30) -> None:
@@ -133,7 +133,7 @@ def delete_files_older_than(directory: str, minutes: int = 30) -> None:
133
  logger.exception(f"Error deleting {file_path}: {e}")
134
 
135
 
136
- def save_base64_audio_to_file(base64_audio: str, filename: str) -> str:
137
  """
138
  Decode a base64-encoded audio string and write the resulting binary data to a file
139
  within the preconfigured AUDIO_DIR directory. Prior to writing the bytes to an audio
@@ -158,11 +158,11 @@ def save_base64_audio_to_file(base64_audio: str, filename: str) -> str:
158
  audio_bytes = base64.b64decode(base64_audio)
159
 
160
  # Construct the full absolute file path within the AUDIO_DIR directory using Path.
161
- file_path = Path(AUDIO_DIR) / filename
162
 
163
  # Delete all audio files older than 30 minutes before writing the new audio file.
164
  num_minutes = 30
165
- delete_files_older_than(AUDIO_DIR, num_minutes)
166
 
167
  # Write the binary audio data to the file.
168
  with file_path.open("wb") as audio_file:
@@ -204,11 +204,7 @@ def choose_providers(
204
  hume_comparison_only = text_modified or not character_description
205
 
206
  provider_a = constants.HUME_AI
207
- provider_b = (
208
- constants.HUME_AI
209
- if hume_comparison_only
210
- else random.choice(constants.TTS_PROVIDERS)
211
- )
212
 
213
  return provider_a, provider_b
214
 
@@ -277,10 +273,8 @@ def determine_selected_option(
277
 
278
  return selected_option, other_option
279
 
280
- def determine_comparison_type(
281
- provider_a: TTSProviderName,
282
- provider_b: TTSProviderName
283
- ) -> ComparisonType:
284
  """
285
  Determine the comparison type based on the given TTS provider names.
286
 
@@ -311,7 +305,7 @@ def log_voting_results(voting_results: VotingResults) -> None:
311
  logger.info("Voting results:\n%s", json.dumps(voting_results, indent=4))
312
 
313
 
314
- def handle_vote_failure(e: Exception, voting_results: VotingResults, is_dummy_db_session: bool) -> None:
315
  """
316
  Handles logging when creating a vote record fails.
317
 
@@ -322,22 +316,42 @@ def handle_vote_failure(e: Exception, voting_results: VotingResults, is_dummy_db
322
  In development with a dummy session:
323
  - Only logs the voting results.
324
  """
325
- if APP_ENV == "prod" or (APP_ENV == "dev" and not is_dummy_db_session):
326
- logger.error("Failed to create vote record: %s", e, exc_info=(APP_ENV == "prod"))
327
  log_voting_results(voting_results)
328
- if APP_ENV == "prod":
329
  raise e
330
  else:
331
  # Dev mode with a dummy session: only log the voting results.
332
  log_voting_results(voting_results)
333
 
334
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
  def submit_voting_results(
336
  option_map: OptionMap,
337
  selected_option: str,
338
  text_modified: bool,
339
  character_description: str,
340
  text: str,
 
 
341
  ) -> None:
342
  """
343
  Constructs the voting results dictionary from the provided inputs,
@@ -367,17 +381,34 @@ def submit_voting_results(
367
  "is_custom_text": text_modified,
368
  }
369
 
370
- db = SessionLocal()
371
- is_dummy_db_session = getattr(db, "is_dummy", False)
372
- try:
373
- crud.create_vote(db, voting_results)
374
- except Exception as e:
375
- handle_vote_failure(e, voting_results, is_dummy_db_session)
376
- else:
377
- logger.info("Vote record created successfully.")
378
- if APP_ENV == "dev":
379
- log_voting_results(voting_results)
380
- finally:
381
- db.close()
382
 
 
 
383
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  # Standard Library Imports
9
  import base64
10
  import json
11
+ import os
12
  import random
13
  import time
14
  from pathlib import Path
15
+ from typing import Tuple, cast
16
+
17
+ from sqlalchemy.orm import Session
18
 
19
  # Local Application Imports
20
  from src import constants
21
+ from src.config import Config, logger
22
  from src.custom_types import (
23
  ComparisonType,
24
  Option,
 
27
  TTSProviderName,
28
  VotingResults,
29
  )
30
+ from src.database import crud
31
+ from src.database.database import DBSessionMaker
32
 
33
 
34
  def truncate_text(text: str, max_length: int = 50) -> str:
 
81
  stripped_character_description = character_description.strip()
82
  character_description_length = len(stripped_character_description)
83
 
84
+ logger.debug(f"Voice description length being validated: {character_description_length} characters")
 
 
85
 
86
  if character_description_length < constants.CHARACTER_DESCRIPTION_MIN_LENGTH:
87
  raise ValueError(
 
97
  )
98
 
99
  truncated_description = truncate_text(stripped_character_description)
100
+ logger.debug(f"Character description length validation passed for character_description: {truncated_description}")
 
 
101
 
102
 
103
  def delete_files_older_than(directory: str, minutes: int = 30) -> None:
 
133
  logger.exception(f"Error deleting {file_path}: {e}")
134
 
135
 
136
+ def save_base64_audio_to_file(base64_audio: str, filename: str, config: Config) -> str:
137
  """
138
  Decode a base64-encoded audio string and write the resulting binary data to a file
139
  within the preconfigured AUDIO_DIR directory. Prior to writing the bytes to an audio
 
158
  audio_bytes = base64.b64decode(base64_audio)
159
 
160
  # Construct the full absolute file path within the AUDIO_DIR directory using Path.
161
+ file_path = Path(config.audio_dir) / filename
162
 
163
  # Delete all audio files older than 30 minutes before writing the new audio file.
164
  num_minutes = 30
165
+ delete_files_older_than(config.audio_dir, num_minutes)
166
 
167
  # Write the binary audio data to the file.
168
  with file_path.open("wb") as audio_file:
 
204
  hume_comparison_only = text_modified or not character_description
205
 
206
  provider_a = constants.HUME_AI
207
+ provider_b = constants.HUME_AI if hume_comparison_only else random.choice(constants.TTS_PROVIDERS)
 
 
 
 
208
 
209
  return provider_a, provider_b
210
 
 
273
 
274
  return selected_option, other_option
275
 
276
+
277
+ def determine_comparison_type(provider_a: TTSProviderName, provider_b: TTSProviderName) -> ComparisonType:
 
 
278
  """
279
  Determine the comparison type based on the given TTS provider names.
280
 
 
305
  logger.info("Voting results:\n%s", json.dumps(voting_results, indent=4))
306
 
307
 
308
+ def handle_vote_failure(e: Exception, voting_results: VotingResults, is_dummy_db_session: bool, config: Config) -> None:
309
  """
310
  Handles logging when creating a vote record fails.
311
 
 
316
  In development with a dummy session:
317
  - Only logs the voting results.
318
  """
319
+ if config.app_env == "prod" or (config.app_env == "dev" and not is_dummy_db_session):
320
+ logger.error("Failed to create vote record: %s", e, exc_info=(config.app_env == "prod"))
321
  log_voting_results(voting_results)
322
+ if config.app_env == "prod":
323
  raise e
324
  else:
325
  # Dev mode with a dummy session: only log the voting results.
326
  log_voting_results(voting_results)
327
 
328
 
329
+ def _persist_vote(db_session_maker: DBSessionMaker, voting_results: VotingResults, config: Config) -> None:
330
+ db = db_session_maker()
331
+ is_dummy_db_session = getattr(db, "is_dummy", False)
332
+ if is_dummy_db_session:
333
+ logger.info("Vote record created successfully.")
334
+ log_voting_results(voting_results)
335
+ try:
336
+ crud.create_vote(cast(Session, db), voting_results)
337
+ except Exception as e:
338
+ handle_vote_failure(e, voting_results, is_dummy_db_session, config)
339
+ else:
340
+ logger.info("Vote record created successfully.")
341
+ if config.app_env == "dev":
342
+ log_voting_results(voting_results)
343
+ finally:
344
+ db.close()
345
+
346
+
347
  def submit_voting_results(
348
  option_map: OptionMap,
349
  selected_option: str,
350
  text_modified: bool,
351
  character_description: str,
352
  text: str,
353
+ db_session_maker: DBSessionMaker,
354
+ config: Config,
355
  ) -> None:
356
  """
357
  Constructs the voting results dictionary from the provided inputs,
 
381
  "is_custom_text": text_modified,
382
  }
383
 
384
+ _persist_vote(db_session_maker, voting_results, config)
385
+
386
+
387
+ def validate_env_var(var_name: str) -> str:
388
+ """
389
+ Validates that an environment variable is set and returns its value.
390
+
391
+ Args:
392
+ var_name (str): The name of the environment variable to validate.
393
+
394
+ Returns:
395
+ str: The value of the environment variable.
396
 
397
+ Raises:
398
+ ValueError: If the environment variable is not set.
399
 
400
+ Examples:
401
+ >>> import os
402
+ >>> os.environ["EXAMPLE_VAR"] = "example_value"
403
+ >>> validate_env_var("EXAMPLE_VAR")
404
+ 'example_value'
405
+
406
+ >>> validate_env_var("MISSING_VAR")
407
+ Traceback (most recent call last):
408
+ ...
409
+ ValueError: MISSING_VAR is not set. Please ensure it is defined in your environment variables.
410
+ """
411
+ value = os.environ.get(var_name, "")
412
+ if not value:
413
+ raise ValueError(f"{var_name} is not set. Please ensure it is defined in your environment variables.")
414
+ return value
uv.lock CHANGED
@@ -167,7 +167,7 @@ name = "click"
167
  version = "8.1.8"
168
  source = { registry = "https://pypi.org/simple" }
169
  dependencies = [
170
- { name = "colorama", marker = "sys_platform == 'win32'" },
171
  ]
172
  sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 }
173
  wheels = [
@@ -237,6 +237,7 @@ dependencies = [
237
  dev = [
238
  { name = "mypy" },
239
  { name = "pre-commit" },
 
240
  { name = "pytest" },
241
  { name = "ruff" },
242
  { name = "watchfiles" },
@@ -258,6 +259,7 @@ requires-dist = [
258
  dev = [
259
  { name = "mypy", specifier = ">=1.15.0" },
260
  { name = "pre-commit", specifier = ">=4.1.0" },
 
261
  { name = "pytest", specifier = ">=8.3.4" },
262
  { name = "ruff", specifier = ">=0.9.5" },
263
  { name = "watchfiles", specifier = ">=1.0.4" },
@@ -966,6 +968,19 @@ wheels = [
966
  { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 },
967
  ]
968
 
 
 
 
 
 
 
 
 
 
 
 
 
 
969
  [[package]]
970
  name = "pytest"
971
  version = "8.3.4"
@@ -1228,7 +1243,7 @@ name = "tqdm"
1228
  version = "4.67.1"
1229
  source = { registry = "https://pypi.org/simple" }
1230
  dependencies = [
1231
- { name = "colorama", marker = "sys_platform == 'win32'" },
1232
  ]
1233
  sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 }
1234
  wheels = [
 
167
  version = "8.1.8"
168
  source = { registry = "https://pypi.org/simple" }
169
  dependencies = [
170
+ { name = "colorama", marker = "platform_system == 'Windows'" },
171
  ]
172
  sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 }
173
  wheels = [
 
237
  dev = [
238
  { name = "mypy" },
239
  { name = "pre-commit" },
240
+ { name = "pyright" },
241
  { name = "pytest" },
242
  { name = "ruff" },
243
  { name = "watchfiles" },
 
259
  dev = [
260
  { name = "mypy", specifier = ">=1.15.0" },
261
  { name = "pre-commit", specifier = ">=4.1.0" },
262
+ { name = "pyright", specifier = ">=1.1.394" },
263
  { name = "pytest", specifier = ">=8.3.4" },
264
  { name = "ruff", specifier = ">=0.9.5" },
265
  { name = "watchfiles", specifier = ">=1.0.4" },
 
968
  { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 },
969
  ]
970
 
971
+ [[package]]
972
+ name = "pyright"
973
+ version = "1.1.394"
974
+ source = { registry = "https://pypi.org/simple" }
975
+ dependencies = [
976
+ { name = "nodeenv" },
977
+ { name = "typing-extensions" },
978
+ ]
979
+ sdist = { url = "https://files.pythonhosted.org/packages/b1/e4/79f4d8a342eed6790fdebdb500e95062f319ee3d7d75ae27304ff995ae8c/pyright-1.1.394.tar.gz", hash = "sha256:56f2a3ab88c5214a451eb71d8f2792b7700434f841ea219119ade7f42ca93608", size = 3809348 }
980
+ wheels = [
981
+ { url = "https://files.pythonhosted.org/packages/d6/4c/50c74e3d589517a9712a61a26143b587dba6285434a17aebf2ce6b82d2c3/pyright-1.1.394-py3-none-any.whl", hash = "sha256:5f74cce0a795a295fb768759bbeeec62561215dea657edcaab48a932b031ddbb", size = 5679540 },
982
+ ]
983
+
984
  [[package]]
985
  name = "pytest"
986
  version = "8.3.4"
 
1243
  version = "4.67.1"
1244
  source = { registry = "https://pypi.org/simple" }
1245
  dependencies = [
1246
+ { name = "colorama", marker = "platform_system == 'Windows'" },
1247
  ]
1248
  sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 }
1249
  wheels = [