zach commited on
Commit
d5a40dd
Β·
1 Parent(s): 0437219

Update Character dropdown to automatically generate text, simpify instructions content, add info to dropdown

Browse files
Files changed (2) hide show
  1. src/app.py +87 -63
  2. src/constants.py +1 -1
src/app.py CHANGED
@@ -283,8 +283,8 @@ class App:
283
  gr.update(interactive=False), # disable Generate Text button
284
  gr.update(interactive=False), # disable Input Text input
285
  gr.update(interactive=False), # disable Synthesize Speech Button
286
- gr.update(interactive=False), # disable Choose Select A Button
287
- gr.update(interactive=False), # disable Choose Select B Button
288
  )
289
 
290
  def _enable_ui(self) -> Tuple[
@@ -307,8 +307,8 @@ class App:
307
  gr.update(interactive=True), # enable Generate Text button
308
  gr.update(interactive=True), # enable Input Text input
309
  gr.update(interactive=True), # enable Synthesize Speech Button
310
- gr.update(interactive=True), # enable Choose Select A Button
311
- gr.update(interactive=True), # enable Choose Select B Button
312
  )
313
 
314
  def _reset_voting_ui(self) -> Tuple[
@@ -329,14 +329,14 @@ class App:
329
  "option_b": {"provider": constants.HUME_AI, "generation_id": None, "audio_file_path": ""},
330
  }
331
  return (
332
- gr.update(value=None), # clear audio player A
333
- gr.update(value=None, autoplay=False), # clear audio player B
334
- gr.update(visible=True, interactive=False), # show vote button A
335
- gr.update(visible=True, interactive=False), # show vote button B
336
- gr.update(visible=False, elem_classes=None), # hide vote result A and clear custom styling
337
- gr.update(visible=False, elem_classes=None), # hide vote result B and clear custom styling
338
- default_option_map, # Reset option_map_state as a default OptionMap
339
- False, # Reset vote_submitted_state
340
  )
341
 
342
  def _build_heading_section(self) -> Tuple[gr.HTML, gr.Button, gr.HTML]:
@@ -352,25 +352,21 @@ class App:
352
  <p style="font-size: 16px; font-weight: bold;">
353
  <strong>Instructions</strong>
354
  </p>
355
- <ol style="margin-left: 8px;">
356
  <li>
357
- Choose or enter a character description by selecting a sample or typing your own to guide
358
- text generation and voice synthesis.
359
  </li>
360
  <li>
361
- Click the <strong>"Generate Text"</strong> button to create dialogue for the character;
362
- the text automatically populates the input field for further editing.
363
  </li>
364
  <li>
365
- Click the <strong>"Synthesize Speech"</strong> button to convert your text and character
366
- description into two synthesized speech options for direct comparison.
367
  </li>
368
  <li>
369
- Listen to both audio outputs to assess their expressiveness.
370
- </li>
371
- <li>
372
- Click <strong>"Select Option A"</strong> or <strong>"Select Option B"</strong> to vote for
373
- the most expressive result.
374
  </li>
375
  </ol>
376
  """
@@ -384,15 +380,16 @@ class App:
384
  """
385
  sample_character_description_dropdown = gr.Dropdown(
386
  choices=list(constants.SAMPLE_CHARACTER_DESCRIPTIONS.keys()),
387
- label="Choose a sample character description",
 
388
  value=None,
389
  interactive=True,
390
  )
391
  with gr.Group():
392
  character_description_input = gr.Textbox(
393
  label="Character Description",
394
- placeholder="Enter a character description...",
395
- lines=3,
396
  max_lines=8,
397
  max_length=constants.CHARACTER_DESCRIPTION_MAX_LENGTH,
398
  show_copy_button=True,
@@ -404,7 +401,7 @@ class App:
404
  placeholder="Enter or generate text for synthesis...",
405
  interactive=True,
406
  autoscroll=False,
407
- lines=3,
408
  max_lines=8,
409
  max_length=constants.CHARACTER_DESCRIPTION_MAX_LENGTH,
410
  show_copy_button=True,
@@ -472,10 +469,10 @@ class App:
472
  """
473
  with gr.Blocks(
474
  title="Expressive TTS Arena",
475
- fill_width=True,
476
  css_paths="src/assets/styles.css",
477
  ) as demo:
478
  # --- UI components ---
 
479
  (
480
  title,
481
  randomize_all_button,
@@ -498,6 +495,7 @@ class App:
498
  ) = self._build_output_section()
499
 
500
  # --- UI state components ---
 
501
  # Track character description used for text and voice generation
502
  character_description_state = gr.State("")
503
  # Track text used for speech synthesis
@@ -512,17 +510,15 @@ class App:
512
  vote_submitted_state = gr.State(False)
513
 
514
  # --- Register event handlers ---
515
- # Select a character description, generate text, and synthesize speech with a single button click.
516
- # 1. Select random character descriptions and disable buttons
517
- # 2. Generate text
518
- # 3. Reset UI state for audio players and voting results
519
- # 4. Synthesize speech
520
- # 5. Reenable buttons
 
 
521
  randomize_all_button.click(
522
- fn=self._randomize_character_description,
523
- inputs=[],
524
- outputs=[sample_character_description_dropdown, character_description_input],
525
- ).then(
526
  fn=self._disable_ui,
527
  inputs=[],
528
  outputs=[
@@ -535,10 +531,6 @@ class App:
535
  vote_button_a,
536
  vote_button_b,
537
  ],
538
- ).then(
539
- fn=self._generate_text,
540
- inputs=[character_description_input],
541
- outputs=[text_input, generated_text_state],
542
  ).then(
543
  fn=self._reset_voting_ui,
544
  inputs=[],
@@ -553,9 +545,13 @@ class App:
553
  vote_submitted_state,
554
  ],
555
  ).then(
556
- fn=lambda _=None: gr.update(interactive=False),
557
  inputs=[],
558
- outputs=[text_input],
 
 
 
 
559
  ).then(
560
  fn=self._synthesize_speech,
561
  inputs=[character_description_input, text_input, generated_text_state],
@@ -582,17 +578,51 @@ class App:
582
  ],
583
  )
584
 
585
- # When a sample character description is chosen, update the character description textbox
586
- sample_character_description_dropdown.change(
 
 
 
 
587
  fn=lambda choice: constants.SAMPLE_CHARACTER_DESCRIPTIONS.get(choice, ""),
588
  inputs=[sample_character_description_dropdown],
589
  outputs=[character_description_input],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
590
  )
591
 
592
- # Generate text button click handler chain:
593
- # 1. Disable the "Generate text" button
594
  # 2. Generate text
595
- # 3. Enable the "Generate text" button
596
  generate_text_button.click(
597
  fn=self._disable_ui,
598
  inputs=[],
@@ -625,11 +655,11 @@ class App:
625
  ],
626
  )
627
 
628
- # Synthesize speech button click event handler chain:
629
- # 1. Disable UI
630
  # 2. Reset UI state for audio players and voting results
631
  # 3. Synthesize speech, load audio players, and display vote button
632
- # 4. Enable the "Synthesize speech" button and display vote buttons
633
  synthesize_speech_button.click(
634
  fn=self._disable_ui,
635
  inputs=[],
@@ -682,7 +712,7 @@ class App:
682
  ],
683
  )
684
 
685
- # Handle Option A vote
686
  vote_button_a.click(
687
  fn=lambda _=None: (gr.update(interactive=False), gr.update(interactive=False)),
688
  inputs=[],
@@ -707,7 +737,7 @@ class App:
707
  ],
708
  )
709
 
710
- # Handle Option B vote
711
  vote_button_b.click(
712
  fn=lambda _=None: (gr.update(interactive=False), gr.update(interactive=False)),
713
  inputs=[],
@@ -732,8 +762,9 @@ class App:
732
  ],
733
  )
734
 
735
- # Reload audio player B with audio and set autoplay to True (workaround to play audio back-to-back)
736
  option_a_audio_player.stop(
 
737
  fn=lambda option_map: gr.update(
738
  value=f"{option_map['option_b']['audio_file_path']}?t={int(time.time())}",
739
  autoplay=True,
@@ -742,12 +773,5 @@ class App:
742
  outputs=[option_b_audio_player],
743
  )
744
 
745
- # Enable voting after second audio option playback finishes
746
- option_b_audio_player.stop(
747
- fn=lambda _=None: gr.update(autoplay=False),
748
- inputs=[],
749
- outputs=[option_b_audio_player],
750
- )
751
-
752
  logger.debug("Gradio interface built successfully")
753
  return demo
 
283
  gr.update(interactive=False), # disable Generate Text button
284
  gr.update(interactive=False), # disable Input Text input
285
  gr.update(interactive=False), # disable Synthesize Speech Button
286
+ gr.update(interactive=False), # disable Select A Button
287
+ gr.update(interactive=False), # disable Select B Button
288
  )
289
 
290
  def _enable_ui(self) -> Tuple[
 
307
  gr.update(interactive=True), # enable Generate Text button
308
  gr.update(interactive=True), # enable Input Text input
309
  gr.update(interactive=True), # enable Synthesize Speech Button
310
+ gr.update(interactive=True), # enable Select A Button
311
+ gr.update(interactive=True), # enable Select B Button
312
  )
313
 
314
  def _reset_voting_ui(self) -> Tuple[
 
329
  "option_b": {"provider": constants.HUME_AI, "generation_id": None, "audio_file_path": ""},
330
  }
331
  return (
332
+ gr.update(value=None), # clear audio for audio player A
333
+ gr.update(value=None, autoplay=False), # clear audio and disable autoplay for audio player B
334
+ gr.update(visible=True), # show vote button A
335
+ gr.update(visible=True), # show vote button B
336
+ gr.update(visible=False, elem_classes=[]), # hide vote result A and clear custom styling
337
+ gr.update(visible=False, elem_classes=[]), # hide vote result B and clear custom styling
338
+ default_option_map, # Reset option_map_state as a default OptionMap
339
+ False, # Reset vote_submitted_state
340
  )
341
 
342
  def _build_heading_section(self) -> Tuple[gr.HTML, gr.Button, gr.HTML]:
 
352
  <p style="font-size: 16px; font-weight: bold;">
353
  <strong>Instructions</strong>
354
  </p>
355
+ <ol style="margin-left: 12px;">
356
  <li>
357
+ Select a sample character, or input a custom character description and click
358
+ <strong>"Generate Text"</strong>, to generate your text input.
359
  </li>
360
  <li>
361
+ Click the <strong>"Synthesize Speech"</strong> button to synthesize two TTS outputs based on
362
+ your text and character description.
363
  </li>
364
  <li>
365
+ Listen to both audio samples to compare their expressiveness.
 
366
  </li>
367
  <li>
368
+ Vote for the most expressive result by clicking either <strong>"Select Option A"</strong> or
369
+ <strong>"Select Option B"</strong>.
 
 
 
370
  </li>
371
  </ol>
372
  """
 
380
  """
381
  sample_character_description_dropdown = gr.Dropdown(
382
  choices=list(constants.SAMPLE_CHARACTER_DESCRIPTIONS.keys()),
383
+ label="Sample Characters",
384
+ info="Generate text with a sample character description.",
385
  value=None,
386
  interactive=True,
387
  )
388
  with gr.Group():
389
  character_description_input = gr.Textbox(
390
  label="Character Description",
391
+ placeholder="Enter a custom character description...",
392
+ lines=2,
393
  max_lines=8,
394
  max_length=constants.CHARACTER_DESCRIPTION_MAX_LENGTH,
395
  show_copy_button=True,
 
401
  placeholder="Enter or generate text for synthesis...",
402
  interactive=True,
403
  autoscroll=False,
404
+ lines=2,
405
  max_lines=8,
406
  max_length=constants.CHARACTER_DESCRIPTION_MAX_LENGTH,
407
  show_copy_button=True,
 
469
  """
470
  with gr.Blocks(
471
  title="Expressive TTS Arena",
 
472
  css_paths="src/assets/styles.css",
473
  ) as demo:
474
  # --- UI components ---
475
+
476
  (
477
  title,
478
  randomize_all_button,
 
495
  ) = self._build_output_section()
496
 
497
  # --- UI state components ---
498
+
499
  # Track character description used for text and voice generation
500
  character_description_state = gr.State("")
501
  # Track text used for speech synthesis
 
510
  vote_submitted_state = gr.State(False)
511
 
512
  # --- Register event handlers ---
513
+
514
+ # "Randomize All" button click event handler chain
515
+ # 1. Disable interactive UI components
516
+ # 2. Reset UI state for audio players and voting results
517
+ # 3. Select random sample character description
518
+ # 4. Generate text
519
+ # 5. Synthesize speech
520
+ # 6. Enable interactive UI components
521
  randomize_all_button.click(
 
 
 
 
522
  fn=self._disable_ui,
523
  inputs=[],
524
  outputs=[
 
531
  vote_button_a,
532
  vote_button_b,
533
  ],
 
 
 
 
534
  ).then(
535
  fn=self._reset_voting_ui,
536
  inputs=[],
 
545
  vote_submitted_state,
546
  ],
547
  ).then(
548
+ fn=self._randomize_character_description,
549
  inputs=[],
550
+ outputs=[sample_character_description_dropdown, character_description_input],
551
+ ).then(
552
+ fn=self._generate_text,
553
+ inputs=[character_description_input],
554
+ outputs=[text_input, generated_text_state],
555
  ).then(
556
  fn=self._synthesize_speech,
557
  inputs=[character_description_input, text_input, generated_text_state],
 
578
  ],
579
  )
580
 
581
+ # "Sample Characters" dropdown select event handler chain:
582
+ # 1. Update Character Description field with sample
583
+ # 2. Disable interactive UI components
584
+ # 3. Generate text
585
+ # 4. Enable interactive UI components
586
+ sample_character_description_dropdown.select(
587
  fn=lambda choice: constants.SAMPLE_CHARACTER_DESCRIPTIONS.get(choice, ""),
588
  inputs=[sample_character_description_dropdown],
589
  outputs=[character_description_input],
590
+ ).then(
591
+ fn=self._disable_ui,
592
+ inputs=[],
593
+ outputs=[
594
+ randomize_all_button,
595
+ sample_character_description_dropdown,
596
+ character_description_input,
597
+ generate_text_button,
598
+ text_input,
599
+ synthesize_speech_button,
600
+ vote_button_a,
601
+ vote_button_b,
602
+ ],
603
+ ).then(
604
+ fn=self._generate_text,
605
+ inputs=[character_description_input],
606
+ outputs=[text_input, generated_text_state],
607
+ ).then(
608
+ fn=self._enable_ui,
609
+ inputs=[],
610
+ outputs=[
611
+ randomize_all_button,
612
+ sample_character_description_dropdown,
613
+ character_description_input,
614
+ generate_text_button,
615
+ text_input,
616
+ synthesize_speech_button,
617
+ vote_button_a,
618
+ vote_button_b,
619
+ ],
620
  )
621
 
622
+ # "Generate Text" button click event handler chain:
623
+ # 1. Disable interactive UI components
624
  # 2. Generate text
625
+ # 3. Enable interactive UI components
626
  generate_text_button.click(
627
  fn=self._disable_ui,
628
  inputs=[],
 
655
  ],
656
  )
657
 
658
+ # "Synthesize Speech" button click event handler chain:
659
+ # 1. Disable components in the UI
660
  # 2. Reset UI state for audio players and voting results
661
  # 3. Synthesize speech, load audio players, and display vote button
662
+ # 4. Enable interactive components in the UI
663
  synthesize_speech_button.click(
664
  fn=self._disable_ui,
665
  inputs=[],
 
712
  ],
713
  )
714
 
715
+ # "Select Option A" button click event handler chain:
716
  vote_button_a.click(
717
  fn=lambda _=None: (gr.update(interactive=False), gr.update(interactive=False)),
718
  inputs=[],
 
737
  ],
738
  )
739
 
740
+ # "Select Option B" button click event handler chain:
741
  vote_button_b.click(
742
  fn=lambda _=None: (gr.update(interactive=False), gr.update(interactive=False)),
743
  inputs=[],
 
762
  ],
763
  )
764
 
765
+ # Audio Player A stop event handler
766
  option_a_audio_player.stop(
767
+ # Workaround to play both audio samples back-to-back
768
  fn=lambda option_map: gr.update(
769
  value=f"{option_map['option_b']['audio_file_path']}?t={int(time.time())}",
770
  autoplay=True,
 
773
  outputs=[option_b_audio_player],
774
  )
775
 
 
 
 
 
 
 
 
776
  logger.debug("Gradio interface built successfully")
777
  return demo
src/constants.py CHANGED
@@ -67,7 +67,7 @@ SAMPLE_CHARACTER_DESCRIPTIONS: dict = {
67
  "rising inflections at sentence ends and bursts into spontaneous laughter when excited."
68
  ),
69
  "πŸ‘‘ Obnoxious Prince": (
70
- "Speaker is a prince of England speaks in a smug and authoritative voice in an obnoxious, proper English "
71
  "accent. He is insecure, arrogant, and prone to tantrums."
72
  ),
73
  "🏰 Medieval Peasant Man": (
 
67
  "rising inflections at sentence ends and bursts into spontaneous laughter when excited."
68
  ),
69
  "πŸ‘‘ Obnoxious Prince": (
70
+ "Speaker is a prince of England who speaks in a smug and authoritative voice in an obnoxious, proper English "
71
  "accent. He is insecure, arrogant, and prone to tantrums."
72
  ),
73
  "🏰 Medieval Peasant Man": (