Spaces:
Running
Running
zach
commited on
Commit
Β·
d5a40dd
1
Parent(s):
0437219
Update Character dropdown to automatically generate text, simpify instructions content, add info to dropdown
Browse files- src/app.py +87 -63
- src/constants.py +1 -1
src/app.py
CHANGED
@@ -283,8 +283,8 @@ class App:
|
|
283 |
gr.update(interactive=False), # disable Generate Text button
|
284 |
gr.update(interactive=False), # disable Input Text input
|
285 |
gr.update(interactive=False), # disable Synthesize Speech Button
|
286 |
-
gr.update(interactive=False), # disable
|
287 |
-
gr.update(interactive=False), # disable
|
288 |
)
|
289 |
|
290 |
def _enable_ui(self) -> Tuple[
|
@@ -307,8 +307,8 @@ class App:
|
|
307 |
gr.update(interactive=True), # enable Generate Text button
|
308 |
gr.update(interactive=True), # enable Input Text input
|
309 |
gr.update(interactive=True), # enable Synthesize Speech Button
|
310 |
-
gr.update(interactive=True), # enable
|
311 |
-
gr.update(interactive=True), # enable
|
312 |
)
|
313 |
|
314 |
def _reset_voting_ui(self) -> Tuple[
|
@@ -329,14 +329,14 @@ class App:
|
|
329 |
"option_b": {"provider": constants.HUME_AI, "generation_id": None, "audio_file_path": ""},
|
330 |
}
|
331 |
return (
|
332 |
-
gr.update(value=None), # clear audio player A
|
333 |
-
gr.update(value=None, autoplay=False),
|
334 |
-
gr.update(visible=True,
|
335 |
-
gr.update(visible=True,
|
336 |
-
gr.update(visible=False, elem_classes=
|
337 |
-
gr.update(visible=False, elem_classes=
|
338 |
-
default_option_map,
|
339 |
-
False,
|
340 |
)
|
341 |
|
342 |
def _build_heading_section(self) -> Tuple[gr.HTML, gr.Button, gr.HTML]:
|
@@ -352,25 +352,21 @@ class App:
|
|
352 |
<p style="font-size: 16px; font-weight: bold;">
|
353 |
<strong>Instructions</strong>
|
354 |
</p>
|
355 |
-
<ol style="margin-left:
|
356 |
<li>
|
357 |
-
|
358 |
-
|
359 |
</li>
|
360 |
<li>
|
361 |
-
Click the <strong>"
|
362 |
-
|
363 |
</li>
|
364 |
<li>
|
365 |
-
|
366 |
-
description into two synthesized speech options for direct comparison.
|
367 |
</li>
|
368 |
<li>
|
369 |
-
|
370 |
-
|
371 |
-
<li>
|
372 |
-
Click <strong>"Select Option A"</strong> or <strong>"Select Option B"</strong> to vote for
|
373 |
-
the most expressive result.
|
374 |
</li>
|
375 |
</ol>
|
376 |
"""
|
@@ -384,15 +380,16 @@ class App:
|
|
384 |
"""
|
385 |
sample_character_description_dropdown = gr.Dropdown(
|
386 |
choices=list(constants.SAMPLE_CHARACTER_DESCRIPTIONS.keys()),
|
387 |
-
label="
|
|
|
388 |
value=None,
|
389 |
interactive=True,
|
390 |
)
|
391 |
with gr.Group():
|
392 |
character_description_input = gr.Textbox(
|
393 |
label="Character Description",
|
394 |
-
placeholder="Enter a character description...",
|
395 |
-
lines=
|
396 |
max_lines=8,
|
397 |
max_length=constants.CHARACTER_DESCRIPTION_MAX_LENGTH,
|
398 |
show_copy_button=True,
|
@@ -404,7 +401,7 @@ class App:
|
|
404 |
placeholder="Enter or generate text for synthesis...",
|
405 |
interactive=True,
|
406 |
autoscroll=False,
|
407 |
-
lines=
|
408 |
max_lines=8,
|
409 |
max_length=constants.CHARACTER_DESCRIPTION_MAX_LENGTH,
|
410 |
show_copy_button=True,
|
@@ -472,10 +469,10 @@ class App:
|
|
472 |
"""
|
473 |
with gr.Blocks(
|
474 |
title="Expressive TTS Arena",
|
475 |
-
fill_width=True,
|
476 |
css_paths="src/assets/styles.css",
|
477 |
) as demo:
|
478 |
# --- UI components ---
|
|
|
479 |
(
|
480 |
title,
|
481 |
randomize_all_button,
|
@@ -498,6 +495,7 @@ class App:
|
|
498 |
) = self._build_output_section()
|
499 |
|
500 |
# --- UI state components ---
|
|
|
501 |
# Track character description used for text and voice generation
|
502 |
character_description_state = gr.State("")
|
503 |
# Track text used for speech synthesis
|
@@ -512,17 +510,15 @@ class App:
|
|
512 |
vote_submitted_state = gr.State(False)
|
513 |
|
514 |
# --- Register event handlers ---
|
515 |
-
|
516 |
-
#
|
517 |
-
#
|
518 |
-
#
|
519 |
-
#
|
520 |
-
#
|
|
|
|
|
521 |
randomize_all_button.click(
|
522 |
-
fn=self._randomize_character_description,
|
523 |
-
inputs=[],
|
524 |
-
outputs=[sample_character_description_dropdown, character_description_input],
|
525 |
-
).then(
|
526 |
fn=self._disable_ui,
|
527 |
inputs=[],
|
528 |
outputs=[
|
@@ -535,10 +531,6 @@ class App:
|
|
535 |
vote_button_a,
|
536 |
vote_button_b,
|
537 |
],
|
538 |
-
).then(
|
539 |
-
fn=self._generate_text,
|
540 |
-
inputs=[character_description_input],
|
541 |
-
outputs=[text_input, generated_text_state],
|
542 |
).then(
|
543 |
fn=self._reset_voting_ui,
|
544 |
inputs=[],
|
@@ -553,9 +545,13 @@ class App:
|
|
553 |
vote_submitted_state,
|
554 |
],
|
555 |
).then(
|
556 |
-
fn=
|
557 |
inputs=[],
|
558 |
-
outputs=[
|
|
|
|
|
|
|
|
|
559 |
).then(
|
560 |
fn=self._synthesize_speech,
|
561 |
inputs=[character_description_input, text_input, generated_text_state],
|
@@ -582,17 +578,51 @@ class App:
|
|
582 |
],
|
583 |
)
|
584 |
|
585 |
-
#
|
586 |
-
|
|
|
|
|
|
|
|
|
587 |
fn=lambda choice: constants.SAMPLE_CHARACTER_DESCRIPTIONS.get(choice, ""),
|
588 |
inputs=[sample_character_description_dropdown],
|
589 |
outputs=[character_description_input],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
590 |
)
|
591 |
|
592 |
-
# Generate
|
593 |
-
# 1. Disable
|
594 |
# 2. Generate text
|
595 |
-
# 3. Enable
|
596 |
generate_text_button.click(
|
597 |
fn=self._disable_ui,
|
598 |
inputs=[],
|
@@ -625,11 +655,11 @@ class App:
|
|
625 |
],
|
626 |
)
|
627 |
|
628 |
-
# Synthesize
|
629 |
-
# 1. Disable UI
|
630 |
# 2. Reset UI state for audio players and voting results
|
631 |
# 3. Synthesize speech, load audio players, and display vote button
|
632 |
-
# 4. Enable
|
633 |
synthesize_speech_button.click(
|
634 |
fn=self._disable_ui,
|
635 |
inputs=[],
|
@@ -682,7 +712,7 @@ class App:
|
|
682 |
],
|
683 |
)
|
684 |
|
685 |
-
#
|
686 |
vote_button_a.click(
|
687 |
fn=lambda _=None: (gr.update(interactive=False), gr.update(interactive=False)),
|
688 |
inputs=[],
|
@@ -707,7 +737,7 @@ class App:
|
|
707 |
],
|
708 |
)
|
709 |
|
710 |
-
#
|
711 |
vote_button_b.click(
|
712 |
fn=lambda _=None: (gr.update(interactive=False), gr.update(interactive=False)),
|
713 |
inputs=[],
|
@@ -732,8 +762,9 @@ class App:
|
|
732 |
],
|
733 |
)
|
734 |
|
735 |
-
#
|
736 |
option_a_audio_player.stop(
|
|
|
737 |
fn=lambda option_map: gr.update(
|
738 |
value=f"{option_map['option_b']['audio_file_path']}?t={int(time.time())}",
|
739 |
autoplay=True,
|
@@ -742,12 +773,5 @@ class App:
|
|
742 |
outputs=[option_b_audio_player],
|
743 |
)
|
744 |
|
745 |
-
# Enable voting after second audio option playback finishes
|
746 |
-
option_b_audio_player.stop(
|
747 |
-
fn=lambda _=None: gr.update(autoplay=False),
|
748 |
-
inputs=[],
|
749 |
-
outputs=[option_b_audio_player],
|
750 |
-
)
|
751 |
-
|
752 |
logger.debug("Gradio interface built successfully")
|
753 |
return demo
|
|
|
283 |
gr.update(interactive=False), # disable Generate Text button
|
284 |
gr.update(interactive=False), # disable Input Text input
|
285 |
gr.update(interactive=False), # disable Synthesize Speech Button
|
286 |
+
gr.update(interactive=False), # disable Select A Button
|
287 |
+
gr.update(interactive=False), # disable Select B Button
|
288 |
)
|
289 |
|
290 |
def _enable_ui(self) -> Tuple[
|
|
|
307 |
gr.update(interactive=True), # enable Generate Text button
|
308 |
gr.update(interactive=True), # enable Input Text input
|
309 |
gr.update(interactive=True), # enable Synthesize Speech Button
|
310 |
+
gr.update(interactive=True), # enable Select A Button
|
311 |
+
gr.update(interactive=True), # enable Select B Button
|
312 |
)
|
313 |
|
314 |
def _reset_voting_ui(self) -> Tuple[
|
|
|
329 |
"option_b": {"provider": constants.HUME_AI, "generation_id": None, "audio_file_path": ""},
|
330 |
}
|
331 |
return (
|
332 |
+
gr.update(value=None), # clear audio for audio player A
|
333 |
+
gr.update(value=None, autoplay=False), # clear audio and disable autoplay for audio player B
|
334 |
+
gr.update(visible=True), # show vote button A
|
335 |
+
gr.update(visible=True), # show vote button B
|
336 |
+
gr.update(visible=False, elem_classes=[]), # hide vote result A and clear custom styling
|
337 |
+
gr.update(visible=False, elem_classes=[]), # hide vote result B and clear custom styling
|
338 |
+
default_option_map, # Reset option_map_state as a default OptionMap
|
339 |
+
False, # Reset vote_submitted_state
|
340 |
)
|
341 |
|
342 |
def _build_heading_section(self) -> Tuple[gr.HTML, gr.Button, gr.HTML]:
|
|
|
352 |
<p style="font-size: 16px; font-weight: bold;">
|
353 |
<strong>Instructions</strong>
|
354 |
</p>
|
355 |
+
<ol style="margin-left: 12px;">
|
356 |
<li>
|
357 |
+
Select a sample character, or input a custom character description and click
|
358 |
+
<strong>"Generate Text"</strong>, to generate your text input.
|
359 |
</li>
|
360 |
<li>
|
361 |
+
Click the <strong>"Synthesize Speech"</strong> button to synthesize two TTS outputs based on
|
362 |
+
your text and character description.
|
363 |
</li>
|
364 |
<li>
|
365 |
+
Listen to both audio samples to compare their expressiveness.
|
|
|
366 |
</li>
|
367 |
<li>
|
368 |
+
Vote for the most expressive result by clicking either <strong>"Select Option A"</strong> or
|
369 |
+
<strong>"Select Option B"</strong>.
|
|
|
|
|
|
|
370 |
</li>
|
371 |
</ol>
|
372 |
"""
|
|
|
380 |
"""
|
381 |
sample_character_description_dropdown = gr.Dropdown(
|
382 |
choices=list(constants.SAMPLE_CHARACTER_DESCRIPTIONS.keys()),
|
383 |
+
label="Sample Characters",
|
384 |
+
info="Generate text with a sample character description.",
|
385 |
value=None,
|
386 |
interactive=True,
|
387 |
)
|
388 |
with gr.Group():
|
389 |
character_description_input = gr.Textbox(
|
390 |
label="Character Description",
|
391 |
+
placeholder="Enter a custom character description...",
|
392 |
+
lines=2,
|
393 |
max_lines=8,
|
394 |
max_length=constants.CHARACTER_DESCRIPTION_MAX_LENGTH,
|
395 |
show_copy_button=True,
|
|
|
401 |
placeholder="Enter or generate text for synthesis...",
|
402 |
interactive=True,
|
403 |
autoscroll=False,
|
404 |
+
lines=2,
|
405 |
max_lines=8,
|
406 |
max_length=constants.CHARACTER_DESCRIPTION_MAX_LENGTH,
|
407 |
show_copy_button=True,
|
|
|
469 |
"""
|
470 |
with gr.Blocks(
|
471 |
title="Expressive TTS Arena",
|
|
|
472 |
css_paths="src/assets/styles.css",
|
473 |
) as demo:
|
474 |
# --- UI components ---
|
475 |
+
|
476 |
(
|
477 |
title,
|
478 |
randomize_all_button,
|
|
|
495 |
) = self._build_output_section()
|
496 |
|
497 |
# --- UI state components ---
|
498 |
+
|
499 |
# Track character description used for text and voice generation
|
500 |
character_description_state = gr.State("")
|
501 |
# Track text used for speech synthesis
|
|
|
510 |
vote_submitted_state = gr.State(False)
|
511 |
|
512 |
# --- Register event handlers ---
|
513 |
+
|
514 |
+
# "Randomize All" button click event handler chain
|
515 |
+
# 1. Disable interactive UI components
|
516 |
+
# 2. Reset UI state for audio players and voting results
|
517 |
+
# 3. Select random sample character description
|
518 |
+
# 4. Generate text
|
519 |
+
# 5. Synthesize speech
|
520 |
+
# 6. Enable interactive UI components
|
521 |
randomize_all_button.click(
|
|
|
|
|
|
|
|
|
522 |
fn=self._disable_ui,
|
523 |
inputs=[],
|
524 |
outputs=[
|
|
|
531 |
vote_button_a,
|
532 |
vote_button_b,
|
533 |
],
|
|
|
|
|
|
|
|
|
534 |
).then(
|
535 |
fn=self._reset_voting_ui,
|
536 |
inputs=[],
|
|
|
545 |
vote_submitted_state,
|
546 |
],
|
547 |
).then(
|
548 |
+
fn=self._randomize_character_description,
|
549 |
inputs=[],
|
550 |
+
outputs=[sample_character_description_dropdown, character_description_input],
|
551 |
+
).then(
|
552 |
+
fn=self._generate_text,
|
553 |
+
inputs=[character_description_input],
|
554 |
+
outputs=[text_input, generated_text_state],
|
555 |
).then(
|
556 |
fn=self._synthesize_speech,
|
557 |
inputs=[character_description_input, text_input, generated_text_state],
|
|
|
578 |
],
|
579 |
)
|
580 |
|
581 |
+
# "Sample Characters" dropdown select event handler chain:
|
582 |
+
# 1. Update Character Description field with sample
|
583 |
+
# 2. Disable interactive UI components
|
584 |
+
# 3. Generate text
|
585 |
+
# 4. Enable interactive UI components
|
586 |
+
sample_character_description_dropdown.select(
|
587 |
fn=lambda choice: constants.SAMPLE_CHARACTER_DESCRIPTIONS.get(choice, ""),
|
588 |
inputs=[sample_character_description_dropdown],
|
589 |
outputs=[character_description_input],
|
590 |
+
).then(
|
591 |
+
fn=self._disable_ui,
|
592 |
+
inputs=[],
|
593 |
+
outputs=[
|
594 |
+
randomize_all_button,
|
595 |
+
sample_character_description_dropdown,
|
596 |
+
character_description_input,
|
597 |
+
generate_text_button,
|
598 |
+
text_input,
|
599 |
+
synthesize_speech_button,
|
600 |
+
vote_button_a,
|
601 |
+
vote_button_b,
|
602 |
+
],
|
603 |
+
).then(
|
604 |
+
fn=self._generate_text,
|
605 |
+
inputs=[character_description_input],
|
606 |
+
outputs=[text_input, generated_text_state],
|
607 |
+
).then(
|
608 |
+
fn=self._enable_ui,
|
609 |
+
inputs=[],
|
610 |
+
outputs=[
|
611 |
+
randomize_all_button,
|
612 |
+
sample_character_description_dropdown,
|
613 |
+
character_description_input,
|
614 |
+
generate_text_button,
|
615 |
+
text_input,
|
616 |
+
synthesize_speech_button,
|
617 |
+
vote_button_a,
|
618 |
+
vote_button_b,
|
619 |
+
],
|
620 |
)
|
621 |
|
622 |
+
# "Generate Text" button click event handler chain:
|
623 |
+
# 1. Disable interactive UI components
|
624 |
# 2. Generate text
|
625 |
+
# 3. Enable interactive UI components
|
626 |
generate_text_button.click(
|
627 |
fn=self._disable_ui,
|
628 |
inputs=[],
|
|
|
655 |
],
|
656 |
)
|
657 |
|
658 |
+
# "Synthesize Speech" button click event handler chain:
|
659 |
+
# 1. Disable components in the UI
|
660 |
# 2. Reset UI state for audio players and voting results
|
661 |
# 3. Synthesize speech, load audio players, and display vote button
|
662 |
+
# 4. Enable interactive components in the UI
|
663 |
synthesize_speech_button.click(
|
664 |
fn=self._disable_ui,
|
665 |
inputs=[],
|
|
|
712 |
],
|
713 |
)
|
714 |
|
715 |
+
# "Select Option A" button click event handler chain:
|
716 |
vote_button_a.click(
|
717 |
fn=lambda _=None: (gr.update(interactive=False), gr.update(interactive=False)),
|
718 |
inputs=[],
|
|
|
737 |
],
|
738 |
)
|
739 |
|
740 |
+
# "Select Option B" button click event handler chain:
|
741 |
vote_button_b.click(
|
742 |
fn=lambda _=None: (gr.update(interactive=False), gr.update(interactive=False)),
|
743 |
inputs=[],
|
|
|
762 |
],
|
763 |
)
|
764 |
|
765 |
+
# Audio Player A stop event handler
|
766 |
option_a_audio_player.stop(
|
767 |
+
# Workaround to play both audio samples back-to-back
|
768 |
fn=lambda option_map: gr.update(
|
769 |
value=f"{option_map['option_b']['audio_file_path']}?t={int(time.time())}",
|
770 |
autoplay=True,
|
|
|
773 |
outputs=[option_b_audio_player],
|
774 |
)
|
775 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
776 |
logger.debug("Gradio interface built successfully")
|
777 |
return demo
|
src/constants.py
CHANGED
@@ -67,7 +67,7 @@ SAMPLE_CHARACTER_DESCRIPTIONS: dict = {
|
|
67 |
"rising inflections at sentence ends and bursts into spontaneous laughter when excited."
|
68 |
),
|
69 |
"π Obnoxious Prince": (
|
70 |
-
"Speaker is a prince of England speaks in a smug and authoritative voice in an obnoxious, proper English "
|
71 |
"accent. He is insecure, arrogant, and prone to tantrums."
|
72 |
),
|
73 |
"π° Medieval Peasant Man": (
|
|
|
67 |
"rising inflections at sentence ends and bursts into spontaneous laughter when excited."
|
68 |
),
|
69 |
"π Obnoxious Prince": (
|
70 |
+
"Speaker is a prince of England who speaks in a smug and authoritative voice in an obnoxious, proper English "
|
71 |
"accent. He is insecure, arrogant, and prone to tantrums."
|
72 |
),
|
73 |
"π° Medieval Peasant Man": (
|