Spaces:

HumeAI
/

expressive-tts-arena

Running

App Files Files Community

twitchard commited on Feb 13

Commit

1ed6720

unverified ·

1 Parent(s): cb57d96

more explicit control flow

Browse files

Files changed (14) hide show

README.md +2 -2
pyproject.toml +2 -1
src/app.py +472 -482
src/config.py +60 -53
src/database/__init__.py +4 -3
src/database/database.py +51 -45
src/database/models.py +1 -0
src/integrations/__init__.py +7 -4
src/integrations/anthropic_api.py +9 -21
src/integrations/elevenlabs_api.py +8 -17
src/integrations/hume_api.py +9 -12
src/main.py +18 -0
src/utils.py +68 -37
uv.lock +17 -2

README.md CHANGED Viewed

@@ -76,12 +76,12 @@ Expressive TTS Arena/
     Standard
     ```sh
-    uv run python -m src.app
     ```
     With hot-reloading
     ```sh
-    uv run watchfiles "python -m src.app" src
     ```
 4. Test the application by navigating to the the localhost URL in your browser (e.g. `localhost:7860` or `http://127.0.0.1:7860`)

     Standard
     ```sh
+    uv run python -m src.main
     ```
     With hot-reloading
     ```sh
+    uv run watchfiles "python -m src.main" src
     ```
 4. Test the application by navigating to the the localhost URL in your browser (e.g. `localhost:7860` or `http://127.0.0.1:7860`)

pyproject.toml CHANGED Viewed

@@ -19,6 +19,7 @@ dependencies = [
 dev-dependencies = [
     "mypy>=1.15.0",
     "pre-commit>=4.1.0",
     "pytest>=8.3.4",
     "ruff>=0.9.5",
     "watchfiles>=1.0.4",
@@ -79,4 +80,4 @@ select = [
 max-line-length = 120
 [tool.ruff.lint.pydocstyle]
-convention = "google"

 dev-dependencies = [
     "mypy>=1.15.0",
     "pre-commit>=4.1.0",
+    "pyright>=1.1.394",
     "pytest>=8.3.4",
     "ruff>=0.9.5",
     "watchfiles>=1.0.4",
 max-line-length = 120
 [tool.ruff.lint.pydocstyle]
+convention = "google"

src/app.py CHANGED Viewed

@@ -18,8 +18,9 @@ import gradio as gr
 # Local Application Imports
 from src import constants
-from src.config import AUDIO_DIR, logger
 from src.custom_types import ComparisonType, Option, OptionMap
 from src.integrations import (
     AnthropicError,
     ElevenLabsError,
@@ -38,510 +39,499 @@ from src.utils import (
 )
-def generate_text(
-    character_description: str,
-) -> Tuple[Union[str, gr.update], gr.update]:
-    """
-    Validates the character_description and generates text using Anthropic API.
-    Args:
-        character_description (str): The user-provided text for character description.
-    Returns:
-        Tuple containing:
-          - The generated text (as a gr.update).
-          - An update for the generated text state.
-    Raises:
-        gr.Error: On validation or API errors.
-    """
-    try:
-        validate_character_description_length(character_description)
-    except ValueError as ve:
-        logger.warning(f"Validation error: {ve}")
-        raise gr.Error(str(ve))
-    try:
-        generated_text = generate_text_with_claude(character_description)
-        logger.info(f"Generated text ({len(generated_text)} characters).")
-        return gr.update(value=generated_text), generated_text
-    except AnthropicError as ae:
-        logger.error(f"AnthropicError while generating text: {ae!s}")
-        raise gr.Error(
-            f'There was an issue communicating with the Anthropic API: "{ae.message}"'
-        )
-    except Exception as e:
-        logger.error(f"Unexpected error while generating text: {e}")
-        raise gr.Error("Failed to generate text. Please try again later.")
-def synthesize_speech(
-    character_description: str,
-    text: str,
-    generated_text_state: str,
-) -> Tuple[gr.update, gr.update, dict, str, ComparisonType, str, str, bool, str, str]:
-    """
-    Synthesizes two text-to-speech outputs, updates UI state components, and returns additional TTS metadata.
-    This function generates TTS outputs using different providers based on the input text and its modification
-    state. Depending on the selected providers, it may:
-      - Synthesize one Hume and one ElevenLabs output (50% chance), or
-      - Synthesize two Hume outputs (50% chance).
-    The outputs are processed and shuffled, and the corresponding UI components for two audio players are updated.
-    Additional metadata such as the generation IDs, comparison type, and state information are also returned.
-    Args:
-        character_description (str): The description of the character used for generating the voice.
-        text (str): The text content to be synthesized into speech.
-        generated_text_state (str): The previously generated text state, used to determine if the text has
-                                    been modified.
-    Returns:
-        Tuple containing:
-            - gr.update: Update for the first audio player (with autoplay enabled).
-            - gr.update: Update for the second audio player.
-            - dict: A mapping of option constants to their corresponding TTS providers.
-            - str: The raw audio value (relative file path) for option B.
-            - ComparisonType: The comparison type between the selected TTS providers.
-            - str: Generation ID for option A.
-            - str: Generation ID for option B.
-            - bool: Flag indicating whether the text was modified.
-            - str: The original text that was synthesized.
-            - str: The original character description.
-    Raises:
-        gr.Error: If any API or unexpected errors occur during the TTS synthesis process.
-    """
-    if not text:
-        logger.warning("Skipping text-to-speech due to empty text.")
-        raise gr.Error("Please generate or enter text to synthesize.")
-    # Select 2 TTS providers based on whether the text has been modified.
-    text_modified = text != generated_text_state
-    provider_a, provider_b = choose_providers(
-        text_modified, character_description
-    )
-    try:
-        if provider_b == constants.HUME_AI:
-            # If generating 2 Hume outputs, do so in a single API call
-            (
-                generation_id_a,
-                audio_a,
-                generation_id_b,
-                audio_b,
-            ) = text_to_speech_with_hume(character_description, text, 2)
-        else:
-            with ThreadPoolExecutor(max_workers=2) as executor:
-                # Generate a single Hume output
-                future_audio_a = executor.submit(
-                    text_to_speech_with_hume, character_description, text
-                )
-                # Generate a second TTS output from the second provider
-                match provider_b:
-                    case constants.ELEVENLABS:
-                        future_audio_b = executor.submit(
-                            text_to_speech_with_elevenlabs, character_description, text
-                        )
-                    case _:
-                        # Additional TTS Providers can be added here
-                        raise ValueError(f"Unsupported provider: {provider_b}")
-                generation_id_a, audio_a = future_audio_a.result()
-                generation_id_b, audio_b = future_audio_b.result()
-        # Shuffle options so that placement of options in the UI will always be random
-        option_a = Option(provider=provider_a, audio=audio_a, generation_id=generation_id_a)
-        option_b = Option(provider=provider_b, audio=audio_b, generation_id=generation_id_b)
-        options_map: OptionMap = create_shuffled_tts_options(option_a, option_b)
-        option_a_audio = options_map["option_a"]["audio_file_path"]
-        option_b_audio = options_map["option_b"]["audio_file_path"]
-        return (
-            gr.update(value=option_a_audio, visible=True, autoplay=True),
-            gr.update(value=option_b_audio, visible=True),
-            options_map,
             text_modified,
-            text,
             character_description,
         )
-    except ElevenLabsError as ee:
-        logger.error(f"ElevenLabsError while synthesizing speech from text: {ee!s}")
-        raise gr.Error(
-            f'There was an issue communicating with the Elevenlabs API: "{ee.message}"'
         )
-    except HumeError as he:
-        logger.error(f"HumeError while synthesizing speech from text: {he!s}")
-        raise gr.Error(
-            f'There was an issue communicating with the Hume API: "{he.message}"'
         )
-    except Exception as e:
-        logger.error(f"Unexpected error during TTS generation: {e}")
-        raise gr.Error("An unexpected error ocurred. Please try again later.")
-def vote(
-    vote_submitted: bool,
-    option_map: OptionMap,
-    clicked_option_button: str,
-    text_modified: bool,
-    character_description: str,
-    text: str,
-) -> Tuple[bool, gr.update, gr.update, gr.update]:
-    """
-    Handles user voting.
-    Args:
-        vote_submitted (bool): True if a vote was already submitted.
-        option_map (OptionMap): A dictionary mapping option labels to their details.
-            Expected structure:
-            {
-                'Option A': 'Hume AI',
-                'Option B': 'ElevenLabs',
-            }
-        selected_button (str): The button that was clicked.
-    Returns:
-        A tuple of:
-         - A boolean indicating if the vote was accepted.
-         - An update for the selected vote button (showing provider and trophy emoji).
-         - An update for the unselected vote button (showing provider).
-         - An update for enabling vote interactions.
-    """
-    if not option_map or vote_submitted:
-        return gr.skip(), gr.skip(), gr.skip(), gr.skip()
-    selected_option, other_option = determine_selected_option(clicked_option_button)
-    selected_provider = option_map[selected_option]["provider"]
-    other_provider = option_map[other_option]["provider"]
-    # Report voting results to be persisted to results DB
-    submit_voting_results(
-        option_map,
-        selected_option,
-        text_modified,
-        character_description,
-        text,
-    )
-    # Build button text, displaying the provider and voice name, appending the trophy emoji to the selected option.
-    selected_label = f"{selected_provider} {constants.TROPHY_EMOJI}"
-    other_label = f"{other_provider}"
-    return (
-        True,
-        (
-            gr.update(value=selected_label, variant="primary")
-            if selected_option == constants.OPTION_A_KEY
-            else gr.update(value=other_label, variant="secondary")
-        ),
-        (
-            gr.update(value=other_label, variant="secondary")
-            if selected_option == constants.OPTION_A_KEY
-            else gr.update(value=selected_label, variant="primary")
-        ),
-        gr.update(interactive=True),
-    )
-def reset_ui() -> Tuple[gr.update, gr.update, gr.update, gr.update, None, bool]:
-    """
-    Resets UI state before generating new text.
-    Returns:
-        A tuple of updates for:
-         - option_a_audio_player (clear audio)
-         - option_b_audio_player (clear audio)
-         - vote_button_a (disable and reset button text)
-         - vote_button_a (disable and reset button text)
-         - option_map_state (reset option map state)
-         - vote_submitted_state (reset submitted vote state)
-    """
-    return (
-        gr.update(value=None),
-        gr.update(value=None, autoplay=False),
-        gr.update(value=constants.SELECT_OPTION_A, variant="secondary"),
-        gr.update(value=constants.SELECT_OPTION_B, variant="secondary"),
-        None,
-        False,
-    )
-def build_input_section() -> Tuple[gr.Dropdown, gr.Textbox, gr.Button]:
-    """
         Builds the input section including the sample character description dropdown, character
         description input, and generate text button.
-    """
-    sample_character_description_dropdown = gr.Dropdown(
-        choices=list(constants.SAMPLE_CHARACTER_DESCRIPTIONS.keys()),
-        label="Choose a sample character description",
-        value=None,
-        interactive=True,
-    )
-    character_description_input = gr.Textbox(
-        label="Character Description",
-        placeholder="Enter a character description...",
-        lines=3,
-        max_lines=8,
-        max_length=constants.CHARACTER_DESCRIPTION_MAX_LENGTH,
-        show_copy_button=True,
-    )
-    generate_text_button = gr.Button("Generate Text", variant="secondary")
-    return (
-        sample_character_description_dropdown,
-        character_description_input,
-        generate_text_button,
-    )
-def build_output_section() -> (
-    Tuple[gr.Textbox, gr.Button, gr.Audio, gr.Audio, gr.Button, gr.Button]
-):
-    """
-        Builds the output section including text input, audio players, and vote buttons.
-    """
-    text_input = gr.Textbox(
-        label="Input Text",
-        placeholder="Enter or generate text for synthesis...",
-        interactive=True,
-        autoscroll=False,
-        lines=3,
-        max_lines=8,
-        max_length=constants.CHARACTER_DESCRIPTION_MAX_LENGTH,
-        show_copy_button=True,
-    )
-    synthesize_speech_button = gr.Button("Synthesize Speech", variant="primary")
-    with gr.Row(equal_height=True):
-        option_a_audio_player = gr.Audio(
-            label=constants.OPTION_A_LABEL, type="filepath", interactive=False
-        )
-        option_b_audio_player = gr.Audio(
-            label=constants.OPTION_B_LABEL, type="filepath", interactive=False
         )
-    with gr.Row(equal_height=True):
-        vote_button_a = gr.Button(constants.SELECT_OPTION_A, interactive=False)
-        vote_button_b = gr.Button(constants.SELECT_OPTION_B, interactive=False)
-    return (
-        text_input,
-        synthesize_speech_button,
-        option_a_audio_player,
-        option_b_audio_player,
-        vote_button_a,
-        vote_button_b,
-    )
-def build_gradio_interface() -> gr.Blocks:
-    """
-    Builds and configures the Gradio user interface.
-    Returns:
-        gr.Blocks: The fully constructed Gradio UI layout.
-    """
-    custom_theme = CustomTheme()
-    with gr.Blocks(
-        title="Expressive TTS Arena",
-        theme=custom_theme,
-        fill_width=True,
-        css_paths="src/assets/styles.css",
-    ) as demo:
-        # Title & instructions
-        gr.Markdown("# Expressive TTS Arena")
-        gr.Markdown(
-            """
-            1. **Choose or enter a character description**: Select a sample from the list or enter your own to guide
-            text and voice generation.
-            2. **Generate text**: Click **"Generate Text"** to create dialogue based on the character. The generated
-            text will appear in the input field automatically—edit it if needed.
-            3. **Synthesize speech**: Click **"Synthesize Speech"** to send your text and character description to two
-            TTS APIs. Each API generates a voice and synthesizes speech in that voice.
-            4. **Listen & compare**: Play both audio options and assess their expressiveness.
-            5. **Vote for the best**: Click **"Select Option A"** or **"Select Option B"** to choose the most
-            expressive output.
-            """
         )
-        # Build generate text section
-        (
             sample_character_description_dropdown,
             character_description_input,
             generate_text_button,
-        ) = build_input_section()
-        # Build synthesize speech section
-        (
             text_input,
             synthesize_speech_button,
             option_a_audio_player,
             option_b_audio_player,
             vote_button_a,
             vote_button_b,
-        ) = build_output_section()
-        # --- UI state components ---
-        # Track character description used for text and voice generation
-        character_description_state = gr.State("")
-        # Track text used for speech synthesis
-        text_state = gr.State("")
-        # Track generated text state
-        generated_text_state = gr.State("")
-        # Track whether text that was used was generated or modified/custom
-        text_modified_state = gr.State()
-        # Track option map (option A and option B are randomized)
-        option_map_state = gr.State()
-        # Track whether the user has voted for an option
-        vote_submitted_state = gr.State(False)
-        # --- Register event handlers ---
-        # When a sample character description is chosen, update the character description textbox
-        sample_character_description_dropdown.change(
-            fn=lambda choice: constants.SAMPLE_CHARACTER_DESCRIPTIONS.get(choice, ""),
-            inputs=[sample_character_description_dropdown],
-            outputs=[character_description_input],
         )
-        # Generate text button click handler chain:
-        # 1. Disable the "Generate text" button
-        # 2. Generate text
-        # 3. Enable the "Generate text" button
-        generate_text_button.click(
-            fn=lambda: gr.update(interactive=False),
-            inputs=[],
-            outputs=[generate_text_button],
-        ).then(
-            fn=generate_text,
-            inputs=[character_description_input],
-            outputs=[text_input, generated_text_state],
-        ).then(
-            fn=lambda: gr.update(interactive=True),
-            inputs=[],
-            outputs=[generate_text_button],
-        )
-        # Synthesize speech button click event handler chain:
-        # 1. Disable the "Synthesize speech" button
-        # 2. Reset UI state
-        # 3. Synthesize speech, load audio players, and display vote button
-        # 4. Enable the "Synthesize speech" button and display vote buttons
-        synthesize_speech_button.click(
-            fn=lambda: (
-                gr.update(interactive=False),
-                gr.update(interactive=False),
-                gr.update(interactive=False),
-            ),
-            inputs=[],
-            outputs=[synthesize_speech_button, vote_button_a, vote_button_b],
-        ).then(
-            fn=reset_ui,
-            inputs=[],
-            outputs=[
-                option_a_audio_player,
-                option_b_audio_player,
-                vote_button_a,
-                vote_button_b,
-                option_map_state,
-                vote_submitted_state,
-            ],
-        ).then(
-            fn=synthesize_speech,
-            inputs=[character_description_input, text_input, generated_text_state],
-            outputs=[
                 option_a_audio_player,
                 option_b_audio_player,
-                option_map_state,
-                text_modified_state,
-                text_state,
-                character_description_state,
-            ],
-        ).then(
-            fn=lambda: (
-                gr.update(interactive=True),
-                gr.update(interactive=True),
-                gr.update(interactive=True),
-            ),
-            inputs=[],
-            outputs=[synthesize_speech_button, vote_button_a, vote_button_b],
-        )
-        # Vote button click event handlers
-        vote_button_a.click(
-            fn=lambda: (
-                gr.update(interactive=False),
-                gr.update(interactive=False),
-            ),
-            inputs=[],
-            outputs=[vote_button_a, vote_button_b],
-        ).then(
-            fn=vote,
-            inputs=[
-                vote_submitted_state,
-                option_map_state,
-                vote_button_a,
-                text_modified_state,
-                character_description_state,
-                text_state,
-            ],
-            outputs=[
-                vote_submitted_state,
-                vote_button_a,
-                vote_button_b,
-                synthesize_speech_button,
-            ],
-        )
-        vote_button_b.click(
-            fn=lambda: (
-                gr.update(interactive=False),
-                gr.update(interactive=False),
-            ),
-            inputs=[],
-            outputs=[vote_button_a, vote_button_b],
-        ).then(
-            fn=vote,
-            inputs=[
-                vote_submitted_state,
-                option_map_state,
-                vote_button_b,
-                text_modified_state,
-                character_description_state,
-                text_state,
-            ],
-            outputs=[
-                vote_submitted_state,
                 vote_button_a,
                 vote_button_b,
-                synthesize_speech_button,
-            ],
-        )
-        # Reload audio player B with audio and set autoplay to True (workaround to play audio back-to-back)
-        option_a_audio_player.stop(
-            fn=lambda option_map: gr.update(
-                value=f"{option_map['option_b']['audio_file_path']}?t={int(time.time())}",
-                autoplay=True,
-            ),
-            inputs=[option_map_state],
-            outputs=[option_b_audio_player],
-        )
-        # Enable voting after second audio option playback finishes
-        option_b_audio_player.stop(
-            fn=lambda _: gr.update(autoplay=False),
-            inputs=[],
-            outputs=[option_b_audio_player],
-        )
-    logger.debug("Gradio interface built successfully")
-    return demo
-if __name__ == "__main__":
-    logger.info("Launching TTS Arena Gradio app...")
-    demo = build_gradio_interface()
-    demo.launch(server_name="0.0.0.0", allowed_paths=[AUDIO_DIR])

 # Local Application Imports
 from src import constants
+from src.config import Config, logger
 from src.custom_types import ComparisonType, Option, OptionMap
+from src.database.database import DBSessionMaker
 from src.integrations import (
     AnthropicError,
     ElevenLabsError,
 )
+class App:
+    config: Config
+    db_session_maker: DBSessionMaker
+    def __init__(self, config: Config, db_session_maker: DBSessionMaker):
+        self.config = config
+        self.db_session_maker = db_session_maker
+    def generate_text(
+        self,
+        character_description: str,
+    ) -> Tuple[Union[str, gr.update], gr.update]:
+        """
+        Validates the character_description and generates text using Anthropic API.
+        Args:
+            character_description (str): The user-provided text for character description.
+        Returns:
+            Tuple containing:
+              - The generated text (as a gr.update).
+              - An update for the generated text state.
+        Raises:
+            gr.Error: On validation or API errors.
+        """
+        try:
+            validate_character_description_length(character_description)
+        except ValueError as ve:
+            logger.warning(f"Validation error: {ve}")
+            raise gr.Error(str(ve))
+        try:
+            generated_text = generate_text_with_claude(character_description, self.config)
+            logger.info(f"Generated text ({len(generated_text)} characters).")
+            return gr.update(value=generated_text), generated_text
+        except AnthropicError as ae:
+            logger.error(f"AnthropicError while generating text: {ae!s}")
+            raise gr.Error(f'There was an issue communicating with the Anthropic API: "{ae.message}"')
+        except Exception as e:
+            logger.error(f"Unexpected error while generating text: {e}")
+            raise gr.Error("Failed to generate text. Please try again later.")
+    def synthesize_speech(
+        self,
+        character_description: str,
+        text: str,
+        generated_text_state: str,
+    ) -> Tuple[gr.update, gr.update, dict, str, ComparisonType, str, str, bool, str, str]:
+        """
+        Synthesizes two text-to-speech outputs, updates UI state components, and returns additional TTS metadata.
+        This function generates TTS outputs using different providers based on the input text and its modification
+        state. Depending on the selected providers, it may:
+          - Synthesize one Hume and one ElevenLabs output (50% chance), or
+          - Synthesize two Hume outputs (50% chance).
+        The outputs are processed and shuffled, and the corresponding UI components for two audio players are updated.
+        Additional metadata such as the generation IDs, comparison type, and state information are also returned.
+        Args:
+            character_description (str): The description of the character used for generating the voice.
+            text (str): The text content to be synthesized into speech.
+            generated_text_state (str): The previously generated text state, used to determine if the text has
+                                        been modified.
+        Returns:
+            Tuple containing:
+                - gr.update: Update for the first audio player (with autoplay enabled).
+                - gr.update: Update for the second audio player.
+                - dict: A mapping of option constants to their corresponding TTS providers.
+                - str: The raw audio value (relative file path) for option B.
+                - ComparisonType: The comparison type between the selected TTS providers.
+                - str: Generation ID for option A.
+                - str: Generation ID for option B.
+                - bool: Flag indicating whether the text was modified.
+                - str: The original text that was synthesized.
+                - str: The original character description.
+        Raises:
+            gr.Error: If any API or unexpected errors occur during the TTS synthesis process.
+        """
+        if not text:
+            logger.warning("Skipping text-to-speech due to empty text.")
+            raise gr.Error("Please generate or enter text to synthesize.")
+        # Select 2 TTS providers based on whether the text has been modified.
+        text_modified = text != generated_text_state
+        provider_a, provider_b = choose_providers(text_modified, character_description)
+        try:
+            if provider_b == constants.HUME_AI:
+                num_generations = 2
+                # If generating 2 Hume outputs, do so in a single API call
+                (
+                    generation_id_a,
+                    audio_a,
+                    generation_id_b,
+                    audio_b,
+                ) = text_to_speech_with_hume(character_description, text, num_generations, self.config)
+            else:
+                with ThreadPoolExecutor(max_workers=2) as executor:
+                    num_generations = 1
+                    # Generate a single Hume output
+                    future_audio_a = executor.submit(
+                        text_to_speech_with_hume, character_description, text, num_generations, self.config
+                    )
+                    # Generate a second TTS output from the second provider
+                    match provider_b:
+                        case constants.ELEVENLABS:
+                            future_audio_b = executor.submit(
+                                text_to_speech_with_elevenlabs, character_description, text, self.config
+                            )
+                        case _:
+                            # Additional TTS Providers can be added here
+                            raise ValueError(f"Unsupported provider: {provider_b}")
+                    generation_id_a, audio_a = future_audio_a.result()
+                    generation_id_b, audio_b = future_audio_b.result()
+            # Shuffle options so that placement of options in the UI will always be random
+            option_a = Option(provider=provider_a, audio=audio_a, generation_id=generation_id_a)
+            option_b = Option(provider=provider_b, audio=audio_b, generation_id=generation_id_b)
+            options_map: OptionMap = create_shuffled_tts_options(option_a, option_b)
+            option_a_audio = options_map["option_a"]["audio_file_path"]
+            option_b_audio = options_map["option_b"]["audio_file_path"]
+            return (
+                gr.update(value=option_a_audio, visible=True, autoplay=True),
+                gr.update(value=option_b_audio, visible=True),
+                options_map,
+                text_modified,
+                text,
+                character_description,
+            )
+        except ElevenLabsError as ee:
+            logger.error(f"ElevenLabsError while synthesizing speech from text: {ee!s}")
+            raise gr.Error(f'There was an issue communicating with the Elevenlabs API: "{ee.message}"')
+        except HumeError as he:
+            logger.error(f"HumeError while synthesizing speech from text: {he!s}")
+            raise gr.Error(f'There was an issue communicating with the Hume API: "{he.message}"')
+        except Exception as e:
+            logger.error(f"Unexpected error during TTS generation: {e}")
+            raise gr.Error("An unexpected error ocurred. Please try again later.")
+    def vote(
+        self,
+        vote_submitted: bool,
+        option_map: OptionMap,
+        clicked_option_button: str,
+        text_modified: bool,
+        character_description: str,
+        text: str,
+    ) -> Tuple[bool, gr.update, gr.update, gr.update]:
+        """
+        Handles user voting.
+        Args:
+            vote_submitted (bool): True if a vote was already submitted.
+            option_map (OptionMap): A dictionary mapping option labels to their details.
+                Expected structure:
+                {
+                    'Option A': 'Hume AI',
+                    'Option B': 'ElevenLabs',
+                }
+            selected_button (str): The button that was clicked.
+        Returns:
+            A tuple of:
+             - A boolean indicating if the vote was accepted.
+             - An update for the selected vote button (showing provider and trophy emoji).
+             - An update for the unselected vote button (showing provider).
+             - An update for enabling vote interactions.
+        """
+        if not option_map or vote_submitted:
+            return gr.skip(), gr.skip(), gr.skip(), gr.skip()
+        selected_option, other_option = determine_selected_option(clicked_option_button)
+        selected_provider = option_map[selected_option]["provider"]
+        other_provider = option_map[other_option]["provider"]
+        # Report voting results to be persisted to results DB
+        submit_voting_results(
+            option_map,
+            selected_option,
             text_modified,
             character_description,
+            text,
+            self.db_session_maker,
+            self.config,
         )
+        # Build button text, displaying the provider and voice name, appending the trophy emoji to the selected option.
+        selected_label = f"{selected_provider} {constants.TROPHY_EMOJI}"
+        other_label = f"{other_provider}"
+        return (
+            True,
+            (
+                gr.update(value=selected_label, variant="primary")
+                if selected_option == constants.OPTION_A_KEY
+                else gr.update(value=other_label, variant="secondary")
+            ),
+            (
+                gr.update(value=other_label, variant="secondary")
+                if selected_option == constants.OPTION_A_KEY
+                else gr.update(value=selected_label, variant="primary")
+            ),
+            gr.update(interactive=True),
         )
+    def reset_ui(self) -> Tuple[gr.update, gr.update, gr.update, gr.update, None, bool]:
+        """
+        Resets UI state before generating new text.
+        Returns:
+            A tuple of updates for:
+             - option_a_audio_player (clear audio)
+             - option_b_audio_player (clear audio)
+             - vote_button_a (disable and reset button text)
+             - vote_button_a (disable and reset button text)
+             - option_map_state (reset option map state)
+             - vote_submitted_state (reset submitted vote state)
+        """
+        return (
+            gr.update(value=None),
+            gr.update(value=None, autoplay=False),
+            gr.update(value=constants.SELECT_OPTION_A, variant="secondary"),
+            gr.update(value=constants.SELECT_OPTION_B, variant="secondary"),
+            None,
+            False,
         )
+    def build_input_section(self) -> Tuple[gr.Dropdown, gr.Textbox, gr.Button]:
+        """
         Builds the input section including the sample character description dropdown, character
         description input, and generate text button.
+        """
+        sample_character_description_dropdown = gr.Dropdown(
+            choices=list(constants.SAMPLE_CHARACTER_DESCRIPTIONS.keys()),
+            label="Choose a sample character description",
+            value=None,
+            interactive=True,
         )
+        character_description_input = gr.Textbox(
+            label="Character Description",
+            placeholder="Enter a character description...",
+            lines=3,
+            max_lines=8,
+            max_length=constants.CHARACTER_DESCRIPTION_MAX_LENGTH,
+            show_copy_button=True,
         )
+        generate_text_button = gr.Button("Generate Text", variant="secondary")
+        return (
             sample_character_description_dropdown,
             character_description_input,
             generate_text_button,
+        )
+    def build_output_section(self) -> Tuple[gr.Textbox, gr.Button, gr.Audio, gr.Audio, gr.Button, gr.Button]:
+        """
+        Builds the output section including text input, audio players, and vote buttons.
+        """
+        text_input = gr.Textbox(
+            label="Input Text",
+            placeholder="Enter or generate text for synthesis...",
+            interactive=True,
+            autoscroll=False,
+            lines=3,
+            max_lines=8,
+            max_length=constants.CHARACTER_DESCRIPTION_MAX_LENGTH,
+            show_copy_button=True,
+        )
+        synthesize_speech_button = gr.Button("Synthesize Speech", variant="primary")
+        with gr.Row(equal_height=True):
+            option_a_audio_player = gr.Audio(label=constants.OPTION_A_LABEL, type="filepath", interactive=False)
+            option_b_audio_player = gr.Audio(label=constants.OPTION_B_LABEL, type="filepath", interactive=False)
+        with gr.Row(equal_height=True):
+            vote_button_a = gr.Button(constants.SELECT_OPTION_A, interactive=False)
+            vote_button_b = gr.Button(constants.SELECT_OPTION_B, interactive=False)
+        return (
             text_input,
             synthesize_speech_button,
             option_a_audio_player,
             option_b_audio_player,
             vote_button_a,
             vote_button_b,
         )
+    def build_gradio_interface(self) -> gr.Blocks:
+        """
+        Builds and configures the Gradio user interface.
+        Returns:
+            gr.Blocks: The fully constructed Gradio UI layout.
+        """
+        custom_theme = CustomTheme()
+        with gr.Blocks(
+            title="Expressive TTS Arena",
+            theme=custom_theme,
+            fill_width=True,
+            css_paths="src/assets/styles.css",
+        ) as demo:
+            # Title & instructions
+            gr.Markdown("# Expressive TTS Arena")
+            gr.Markdown(
+                """
+                1. **Choose or enter a character description**: Select a sample from the list or enter your own to guide
+                text and voice generation.
+                2. **Generate text**: Click **"Generate Text"** to create dialogue based on the character. The generated
+                text will appear in the input field automatically—edit it if needed.
+                3. **Synthesize speech**: Click **"Synthesize Speech"** to send your text and character description to
+                two TTS APIs. Each API generates a voice and synthesizes speech in that voice.
+                4. **Listen & compare**: Play both audio options and assess their expressiveness.
+                5. **Vote for the best**: Click **"Select Option A"** or **"Select Option B"** to choose the most
+                expressive output.
+                """
+            )
+            # Build generate text section
+            (
+                sample_character_description_dropdown,
+                character_description_input,
+                generate_text_button,
+            ) = self.build_input_section()
+            # Build synthesize speech section
+            (
+                text_input,
+                synthesize_speech_button,
                 option_a_audio_player,
                 option_b_audio_player,
                 vote_button_a,
                 vote_button_b,
+            ) = self.build_output_section()
+            # --- UI state components ---
+            # Track character description used for text and voice generation
+            character_description_state = gr.State("")
+            # Track text used for speech synthesis
+            text_state = gr.State("")
+            # Track generated text state
+            generated_text_state = gr.State("")
+            # Track whether text that was used was generated or modified/custom
+            text_modified_state = gr.State()
+            # Track option map (option A and option B are randomized)
+            option_map_state = gr.State()
+            # Track whether the user has voted for an option
+            vote_submitted_state = gr.State(False)
+            # --- Register event handlers ---
+            # When a sample character description is chosen, update the character description textbox
+            sample_character_description_dropdown.change(
+                fn=lambda choice: constants.SAMPLE_CHARACTER_DESCRIPTIONS.get(choice, ""),
+                inputs=[sample_character_description_dropdown],
+                outputs=[character_description_input],
+            )
+            # Generate text button click handler chain:
+            # 1. Disable the "Generate text" button
+            # 2. Generate text
+            # 3. Enable the "Generate text" button
+            generate_text_button.click(
+                fn=lambda: gr.update(interactive=False),
+                inputs=[],
+                outputs=[generate_text_button],
+            ).then(
+                fn=self.generate_text,
+                inputs=[character_description_input],
+                outputs=[text_input, generated_text_state],
+            ).then(
+                fn=lambda: gr.update(interactive=True),
+                inputs=[],
+                outputs=[generate_text_button],
+            )
+            # Synthesize speech button click event handler chain:
+            # 1. Disable the "Synthesize speech" button
+            # 2. Reset UI state
+            # 3. Synthesize speech, load audio players, and display vote button
+            # 4. Enable the "Synthesize speech" button and display vote buttons
+            synthesize_speech_button.click(
+                fn=lambda: (
+                    gr.update(interactive=False),
+                    gr.update(interactive=False),
+                    gr.update(interactive=False),
+                ),
+                inputs=[],
+                outputs=[synthesize_speech_button, vote_button_a, vote_button_b],
+            ).then(
+                fn=self.reset_ui,
+                inputs=[],
+                outputs=[
+                    option_a_audio_player,
+                    option_b_audio_player,
+                    vote_button_a,
+                    vote_button_b,
+                    option_map_state,
+                    vote_submitted_state,
+                ],
+            ).then(
+                fn=self.synthesize_speech,
+                inputs=[character_description_input, text_input, generated_text_state],
+                outputs=[
+                    option_a_audio_player,
+                    option_b_audio_player,
+                    option_map_state,
+                    text_modified_state,
+                    text_state,
+                    character_description_state,
+                ],
+            ).then(
+                fn=lambda: (
+                    gr.update(interactive=True),
+                    gr.update(interactive=True),
+                    gr.update(interactive=True),
+                ),
+                inputs=[],
+                outputs=[synthesize_speech_button, vote_button_a, vote_button_b],
+            )
+            # Vote button click event handlers
+            vote_button_a.click(
+                fn=lambda: (
+                    gr.update(interactive=False),
+                    gr.update(interactive=False),
+                ),
+                inputs=[],
+                outputs=[vote_button_a, vote_button_b],
+            ).then(
+                fn=self.vote,
+                inputs=[
+                    vote_submitted_state,
+                    option_map_state,
+                    vote_button_a,
+                    text_modified_state,
+                    character_description_state,
+                    text_state,
+                ],
+                outputs=[
+                    vote_submitted_state,
+                    vote_button_a,
+                    vote_button_b,
+                    synthesize_speech_button,
+                ],
+            )
+            vote_button_b.click(
+                fn=lambda: (
+                    gr.update(interactive=False),
+                    gr.update(interactive=False),
+                ),
+                inputs=[],
+                outputs=[vote_button_a, vote_button_b],
+            ).then(
+                fn=self.vote,
+                inputs=[
+                    vote_submitted_state,
+                    option_map_state,
+                    vote_button_b,
+                    text_modified_state,
+                    character_description_state,
+                    text_state,
+                ],
+                outputs=[
+                    vote_submitted_state,
+                    vote_button_a,
+                    vote_button_b,
+                    synthesize_speech_button,
+                ],
+            )
+            # Reload audio player B with audio and set autoplay to True (workaround to play audio back-to-back)
+            option_a_audio_player.stop(
+                fn=lambda option_map: gr.update(
+                    value=f"{option_map['option_b']['audio_file_path']}?t={int(time.time())}",
+                    autoplay=True,
+                ),
+                inputs=[option_map_state],
+                outputs=[option_b_audio_player],
+            )
+            # Enable voting after second audio option playback finishes
+            option_b_audio_player.stop(
+                fn=lambda _: gr.update(autoplay=False),
+                inputs=[],
+                outputs=[option_b_audio_player],
+            )
+        logger.debug("Gradio interface built successfully")
+        return demo

src/config.py CHANGED Viewed

@@ -13,71 +13,78 @@ Key Features:
 # Standard Library Imports
 import logging
 import os
 from pathlib import Path
 # Third-Party Library Imports
 from dotenv import load_dotenv
-# Determine the environment (defaults to "dev" if not explicitly set)
-APP_ENV = os.getenv("APP_ENV", "dev").lower()
-if APP_ENV not in {"dev", "prod"}:
-    APP_ENV = "dev"
-# In development, load environment variables from .env file (not used in production)
-if APP_ENV == "dev" and Path(".env").exists():
-    load_dotenv(".env", override=True)
-# Enable debug mode if in development (or if explicitly set in env variables)
-DEBUG = APP_ENV == "dev" or os.getenv("DEBUG", "false").lower() == "true"
-# Configure the logger
-logging.basicConfig(
-    level=logging.DEBUG if DEBUG else logging.INFO,
-    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
-)
 logger: logging.Logger = logging.getLogger("tts_arena")
-logger.info(f'App running in "{APP_ENV}" mode.')
-logger.info(f'Debug mode is {"enabled" if DEBUG else "disabled"}.')
-if DEBUG:
-    logger.debug("DEBUG mode enabled.")
-# Define the directory for audio files relative to the project root
-AUDIO_DIR = Path.cwd() / "static" / "audio"
-AUDIO_DIR.mkdir(parents=True, exist_ok=True)
-logger.info(f"Audio directory set to {AUDIO_DIR}")
-def validate_env_var(var_name: str) -> str:
-    """
-    Validates that an environment variable is set and returns its value.
-    Args:
-        var_name (str): The name of the environment variable to validate.
-    Returns:
-        str: The value of the environment variable.
-    Raises:
-        ValueError: If the environment variable is not set.
-    Examples:
-        >>> import os
-        >>> os.environ["EXAMPLE_VAR"] = "example_value"
-        >>> validate_env_var("EXAMPLE_VAR")
-        'example_value'
-        >>> validate_env_var("MISSING_VAR")
-        Traceback (most recent call last):
-          ...
-        ValueError: MISSING_VAR is not set. Please ensure it is defined in your environment variables.
-    """
-    value = os.environ.get(var_name, "")
-    if not value:
-        raise ValueError(
-            f"{var_name} is not set. Please ensure it is defined in your environment variables."
         )
-    return value

 # Standard Library Imports
 import logging
 import os
+from dataclasses import dataclass
 from pathlib import Path
+from typing import TYPE_CHECKING, ClassVar, Optional
 # Third-Party Library Imports
 from dotenv import load_dotenv
+if TYPE_CHECKING:
+    from src.integrations import AnthropicConfig, ElevenLabsConfig, HumeConfig
 logger: logging.Logger = logging.getLogger("tts_arena")
+@dataclass(frozen=True)
+class Config:
+    _config: ClassVar[Optional["Config"]] = None
+    app_env: str
+    debug: bool
+    database_url: Optional[str]
+    audio_dir: Path
+    anthropic_config: "AnthropicConfig"
+    hume_config: "HumeConfig"
+    elevenlabs_config: "ElevenLabsConfig"
+    @classmethod
+    def get(cls) -> "Config":
+        if cls._config is None:
+            _config = Config._init()
+            cls._config = _config
+            return _config
+        return cls._config
+    @staticmethod
+    def _init():
+        app_env = os.getenv("APP_ENV", "dev").lower()
+        if app_env not in {"dev", "prod"}:
+            app_env = "dev"
+        # In development, load environment variables from .env file (not used in production)
+        if app_env == "dev" and Path(".env").exists():
+            load_dotenv(".env", override=True)
+        # Enable debug mode if in development (or if explicitly set in env variables)
+        debug = app_env == "dev" or os.getenv("DEBUG", "false").lower() == "true"
+        database_url = os.getenv("DATABASE_URL")
+        # Configure the logger
+        logging.basicConfig(
+            level=logging.DEBUG if debug else logging.INFO,
+            format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+        )
+        logger.info(f'App running in "{app_env}" mode.')
+        logger.info(f"Debug mode is {'enabled' if debug else 'disabled'}.")
+        # Define the directory for audio files relative to the project root
+        audio_dir = Path.cwd() / "static" / "audio"
+        audio_dir.mkdir(parents=True, exist_ok=True)
+        logger.info(f"Audio directory set to {audio_dir}")
+        if debug:
+            logger.debug("DEBUG mode enabled.")
+        from src.integrations import AnthropicConfig, ElevenLabsConfig, HumeConfig
+        return Config(
+            app_env=app_env,
+            debug=debug,
+            database_url=database_url,
+            audio_dir=audio_dir,
+            anthropic_config=AnthropicConfig(),
+            hume_config=HumeConfig(),
+            elevenlabs_config=ElevenLabsConfig(),
         )

src/database/__init__.py CHANGED Viewed

@@ -1,9 +1,10 @@
 from .crud import create_vote
-from .database import Base, SessionLocal, engine
 __all__ = [
     "Base",
-    "SessionLocal",
     "create_vote",
-    "engine"
 ]

 from .crud import create_vote
+from .database import Base, DBSessionMaker, engine, init_db
 __all__ = [
     "Base",
+    "DBSessionMaker",
     "create_vote",
+    "engine",
+    "init_db",
 ]

src/database/database.py CHANGED Viewed

@@ -9,65 +9,71 @@ If no DATABASE_URL environment variable is set, then create a dummy database to
 """
 # Standard Library Imports
-import os
 # Third-Party Library Imports
-from sqlalchemy import create_engine
 from sqlalchemy.orm import declarative_base, sessionmaker
 # Local Application Imports
-from src.config import APP_ENV
-DATABASE_URL = os.getenv("DATABASE_URL")
-if APP_ENV == "prod":
-    # In production, a valid DATABASE_URL is required.
-    if not DATABASE_URL:
-        raise ValueError("DATABASE_URL must be set in production!")
-    engine = create_engine(DATABASE_URL)
-    SessionLocal = sessionmaker(bind=engine)
-# In development, if a DATABASE_URL is provided, use it.
-elif DATABASE_URL:
-    engine = create_engine(DATABASE_URL)
-    SessionLocal = sessionmaker(bind=engine)
-else:
-    # No DATABASE_URL is provided; use a DummySession that does nothing.
-    engine = None
-    class DummySession:
-        is_dummy = True  # Flag to indicate this is a dummy session.
-        def __enter__(self):
-            return self
-        def __exit__(self, exc_type, exc_value, traceback):
-            pass
-        def add(self, _instance, _warn=True):
-            # No-op: simply ignore adding the instance.
-            pass
-        def commit(self):
-            # Raise an exception to simulate failure when attempting a write.
-            raise RuntimeError("DummySession does not support commit operations.")
-        def refresh(self, _instance):
-            # Raise an exception to simulate failure when attempting to refresh.
-            raise RuntimeError("DummySession does not support refresh operations.")
-        def rollback(self):
-            # No-op: there's nothing to roll back.
-            pass
-        def close(self):
-            # No-op: nothing to close.
-            pass
-    def dummy_session_factory():
-        return DummySession()
-    SessionLocal = dummy_session_factory
-# Declarative base class for ORM models.
 Base = declarative_base()

 """
 # Standard Library Imports
+from typing import Callable, Optional
 # Third-Party Library Imports
+from sqlalchemy import Engine, create_engine
 from sqlalchemy.orm import declarative_base, sessionmaker
 # Local Application Imports
+from src.config import Config
+class DummySession:
+    is_dummy = True  # Flag to indicate this is a dummy session.
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        pass
+    def add(self, _instance, _warn=True):
+        # No-op: simply ignore adding the instance.
+        pass
+    def commit(self):
+        # Raise an exception to simulate failure when attempting a write.
+        raise RuntimeError("DummySession does not support commit operations.")
+    def refresh(self, _instance):
+        # Raise an exception to simulate failure when attempting to refresh.
+        raise RuntimeError("DummySession does not support refresh operations.")
+    def rollback(self):
+        # No-op: there's nothing to roll back.
+        pass
+    def close(self):
+        # No-op: nothing to close.
+        pass
 Base = declarative_base()
+engine: Optional[Engine] = None
+DBSessionMaker = sessionmaker | Callable[[], DummySession]
+def init_db(config: Config) -> DBSessionMaker:
+    # ruff doesn't like setting global variables, but this is practical here
+    global engine # noqa
+    if config.app_env == "prod":
+        # In production, a valid DATABASE_URL is required.
+        if not config.database_url:
+            raise ValueError("DATABASE_URL must be set in production!")
+        engine = create_engine(config.database_url)
+        return sessionmaker(bind=engine)
+    # In development, if a DATABASE_URL is provided, use it.
+    if config.database_url:
+        engine = create_engine(config.database_url)
+        return sessionmaker(bind=engine)
+    # No DATABASE_URL is provided; use a DummySession that does nothing.
+    engine = None
+    def dummy_session_factory():
+        return DummySession()
+    return dummy_session_factory

src/database/models.py CHANGED Viewed

@@ -34,6 +34,7 @@ class OptionEnum(str, Enum):
     OPTION_A = "option_a"
     OPTION_B = "option_b"
 class VoteResult(Base):
     __tablename__ = "vote_results"

     OPTION_A = "option_a"
     OPTION_B = "option_b"
 class VoteResult(Base):
     __tablename__ = "vote_results"

src/integrations/__init__.py CHANGED Viewed

@@ -1,12 +1,15 @@
-from .anthropic_api import AnthropicError, generate_text_with_claude
-from .elevenlabs_api import ElevenLabsError, text_to_speech_with_elevenlabs
-from .hume_api import HumeError, text_to_speech_with_hume
 __all__ = [
     "AnthropicError",
     "ElevenLabsError",
     "HumeError",
     "generate_text_with_claude",
     "text_to_speech_with_elevenlabs",
-    "text_to_speech_with_hume"
 ]

+from .anthropic_api import AnthropicConfig, AnthropicError, generate_text_with_claude
+from .elevenlabs_api import ElevenLabsConfig, ElevenLabsError, text_to_speech_with_elevenlabs
+from .hume_api import HumeConfig, HumeError, text_to_speech_with_hume
 __all__ = [
+    "AnthropicConfig",
     "AnthropicError",
+    "ElevenLabsConfig",
     "ElevenLabsError",
+    "HumeConfig",
     "HumeError",
     "generate_text_with_claude",
     "text_to_speech_with_elevenlabs",
+    "text_to_speech_with_hume",
 ]

src/integrations/anthropic_api.py CHANGED Viewed

@@ -29,9 +29,9 @@ from anthropic.types import Message, ModelParam, TextBlock
 from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
 # Local Application Imports
-from src.config import logger, validate_env_var
 from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
-from src.utils import truncate_text
 @dataclass(frozen=True)
@@ -140,10 +140,6 @@ class UnretryableAnthropicError(AnthropicError):
         super().__init__(message, original_exception)
-# Initialize the Anthropic client
-anthropic_config = AnthropicConfig()
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_fixed(2),
@@ -151,7 +147,7 @@ anthropic_config = AnthropicConfig()
     after=after_log(logger, logging.DEBUG),
     reraise=True,
 )
-def generate_text_with_claude(character_description: str) -> str:
     """
     Generates text using Claude (Anthropic LLM) via the Anthropic SDK.
@@ -165,10 +161,9 @@ def generate_text_with_claude(character_description: str) -> str:
         AnthropicError: If there is an error communicating with the Anthropic API.
     """
     # Build prompt for claude with character description
     prompt = anthropic_config.build_expressive_prompt(character_description)
-    logger.debug(
-        f"Generating text with Claude. Character description length: {len(prompt)} characters."
-    )
     response = None
     try:
@@ -189,27 +184,20 @@ def generate_text_with_claude(character_description: str) -> str:
         # Process response
         blocks: Union[List[TextBlock], TextBlock, None] = response.content
         if isinstance(blocks, list):
-            result = "\n\n".join(
-                block.text for block in blocks if isinstance(block, TextBlock)
-            )
             logger.debug(f"Processed response from list: {truncate_text(result)}")
             return result
         if isinstance(blocks, TextBlock):
-            logger.debug(
-                f"Processed response from single TextBlock: {truncate_text(blocks.text)}"
-            )
             return blocks.text
         logger.warning(f"Unexpected response type: {type(blocks)}")
         return str(blocks or "No content generated.")
     except Exception as e:
-        if (
-            isinstance(e, APIError)
-            and e.status_code >= CLIENT_ERROR_CODE and e.status_code < SERVER_ERROR_CODE
-        ):
             raise UnretryableAnthropicError(
-                message=f"\"{e.body['error']['message']}\"",
                 original_exception=e,
             ) from e

 from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
 # Local Application Imports
+from src.config import Config, logger
 from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
+from src.utils import truncate_text, validate_env_var
 @dataclass(frozen=True)
         super().__init__(message, original_exception)
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_fixed(2),
     after=after_log(logger, logging.DEBUG),
     reraise=True,
 )
+def generate_text_with_claude(character_description: str, config: Config) -> str:
     """
     Generates text using Claude (Anthropic LLM) via the Anthropic SDK.
         AnthropicError: If there is an error communicating with the Anthropic API.
     """
     # Build prompt for claude with character description
+    anthropic_config = config.anthropic_config
     prompt = anthropic_config.build_expressive_prompt(character_description)
+    logger.debug(f"Generating text with Claude. Character description length: {len(prompt)} characters.")
     response = None
     try:
         # Process response
         blocks: Union[List[TextBlock], TextBlock, None] = response.content
         if isinstance(blocks, list):
+            result = "\n\n".join(block.text for block in blocks if isinstance(block, TextBlock))
             logger.debug(f"Processed response from list: {truncate_text(result)}")
             return result
         if isinstance(blocks, TextBlock):
+            logger.debug(f"Processed response from single TextBlock: {truncate_text(blocks.text)}")
             return blocks.text
         logger.warning(f"Unexpected response type: {type(blocks)}")
         return str(blocks or "No content generated.")
     except Exception as e:
+        if isinstance(e, APIError) and e.status_code >= CLIENT_ERROR_CODE and e.status_code < SERVER_ERROR_CODE:
             raise UnretryableAnthropicError(
+                message=f'"{e.body["error"]["message"]}"',
                 original_exception=e,
             ) from e

src/integrations/elevenlabs_api.py CHANGED Viewed

@@ -31,9 +31,9 @@ from elevenlabs.core import ApiError
 from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
 # Local Application Imports
-from src.config import logger, validate_env_var
 from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
-from src.utils import save_base64_audio_to_file
 @dataclass(frozen=True)
@@ -76,10 +76,6 @@ class UnretryableElevenLabsError(ElevenLabsError):
         super().__init__(message, original_exception)
-# Initialize the ElevenLabs client
-elevenlabs_config = ElevenLabsConfig()
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_fixed(2),
@@ -87,9 +83,7 @@ elevenlabs_config = ElevenLabsConfig()
     after=after_log(logger, logging.DEBUG),
     reraise=True,
 )
-def text_to_speech_with_elevenlabs(
-    character_description: str, text: str
-) -> Tuple[None, str]:
     """
     Synthesizes text to speech using the ElevenLabs TTS API, processes the audio data, and writes it to a file.
@@ -106,9 +100,9 @@ def text_to_speech_with_elevenlabs(
     Raises:
         ElevenLabsError: If there is an error communicating with the ElevenLabs API or processing the response.
     """
-    logger.debug(
-        f"Synthesizing speech with ElevenLabs. Text length: {len(text)} characters."
-    )
     try:
         # Synthesize speech using the ElevenLabs SDK
@@ -129,16 +123,13 @@ def text_to_speech_with_elevenlabs(
         generated_voice_id = preview.generated_voice_id
         base64_audio = preview.audio_base_64
         filename = f"{generated_voice_id}.mp3"
-        audio_file_path = save_base64_audio_to_file(base64_audio, filename)
         # Write audio to file and return the relative path
         return None, audio_file_path
     except Exception as e:
-        if (
-            isinstance(e, ApiError)
-            and e.status_code >= CLIENT_ERROR_CODE and e.status_code < SERVER_ERROR_CODE
-        ):
             raise UnretryableElevenLabsError(
                 message=f"{e.body['detail']['message']}",
                 original_exception=e,

 from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
 # Local Application Imports
+from src.config import Config, logger
 from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
+from src.utils import save_base64_audio_to_file, validate_env_var
 @dataclass(frozen=True)
         super().__init__(message, original_exception)
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_fixed(2),
     after=after_log(logger, logging.DEBUG),
     reraise=True,
 )
+def text_to_speech_with_elevenlabs(character_description: str, text: str, config: Config) -> Tuple[None, str]:
     """
     Synthesizes text to speech using the ElevenLabs TTS API, processes the audio data, and writes it to a file.
     Raises:
         ElevenLabsError: If there is an error communicating with the ElevenLabs API or processing the response.
     """
+    logger.debug(f"Synthesizing speech with ElevenLabs. Text length: {len(text)} characters.")
+    elevenlabs_config = config.elevenlabs_config
     try:
         # Synthesize speech using the ElevenLabs SDK
         generated_voice_id = preview.generated_voice_id
         base64_audio = preview.audio_base_64
         filename = f"{generated_voice_id}.mp3"
+        audio_file_path = save_base64_audio_to_file(base64_audio, filename, config)
         # Write audio to file and return the relative path
         return None, audio_file_path
     except Exception as e:
+        if isinstance(e, ApiError) and e.status_code >= CLIENT_ERROR_CODE and e.status_code < SERVER_ERROR_CODE:
             raise UnretryableElevenLabsError(
                 message=f"{e.body['detail']['message']}",
                 original_exception=e,

src/integrations/hume_api.py CHANGED Viewed

@@ -29,9 +29,9 @@ from requests.exceptions import HTTPError
 from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
 # Local Application Imports
-from src.config import logger, validate_env_var
 from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
-from src.utils import save_base64_audio_to_file
 HumeSupportedFileFormat = Literal["mp3", "pcm", "wav"]
 """ Support audio file formats for the Hume TTS API"""
@@ -85,7 +85,6 @@ class UnretryableHumeError(HumeError):
 # Initialize the Hume client
-hume_config = HumeConfig()
 @retry(
@@ -96,7 +95,7 @@ hume_config = HumeConfig()
     reraise=True,
 )
 def text_to_speech_with_hume(
-    character_description: str, text: str, num_generations: int = 1
 ) -> Union[Tuple[str, str], Tuple[str, str, str, str]]:
     """
     Synthesizes text to speech using the Hume TTS API, processes audio data, and writes audio to a file.
@@ -135,6 +134,7 @@ def text_to_speech_with_hume(
     if num_generations < 1 or num_generations > 2:
         raise ValueError("Invalid number of generations specified. Must be 1 or 2.")
     request_body = {
         "utterances": [{"text": text, "description": character_description or None}],
         "format": {
@@ -161,20 +161,17 @@ def text_to_speech_with_hume(
         # Extract the base64 encoded audio and generation ID from the generation
         generation_a = generations[0]
-        generation_a_id, audio_a_path = parse_hume_tts_generation(generation_a)
         if num_generations == 1:
             return (generation_a_id, audio_a_path)
         generation_b = generations[1]
-        generation_b_id, audio_b_path = parse_hume_tts_generation(generation_b)
         return (generation_a_id, audio_a_path, generation_b_id, audio_b_path)
     except Exception as e:
-        if (
-            isinstance(e, HTTPError)
-            and CLIENT_ERROR_CODE <= e.response.status_code < SERVER_ERROR_CODE
-        ):
             raise UnretryableHumeError(
                 message=f"{e.response.text}",
                 original_exception=e,
@@ -186,7 +183,7 @@ def text_to_speech_with_hume(
         ) from e
-def parse_hume_tts_generation(generation: Dict[str, Any]) -> Tuple[str, str]:
     """
     Parse a Hume TTS generation response and save the decoded audio as an MP3 file.
@@ -220,5 +217,5 @@ def parse_hume_tts_generation(generation: Dict[str, Any]) -> Tuple[str, str]:
         raise KeyError("The generation dictionary is missing the 'audio' key.")
     filename = f"{generation_id}.mp3"
-    audio_file_path = save_base64_audio_to_file(base64_audio, filename)
     return generation_id, audio_file_path

 from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
 # Local Application Imports
+from src.config import Config, logger
 from src.constants import CLIENT_ERROR_CODE, SERVER_ERROR_CODE
+from src.utils import save_base64_audio_to_file, validate_env_var
 HumeSupportedFileFormat = Literal["mp3", "pcm", "wav"]
 """ Support audio file formats for the Hume TTS API"""
 # Initialize the Hume client
 @retry(
     reraise=True,
 )
 def text_to_speech_with_hume(
+    character_description: str, text: str, num_generations: int, config: Config
 ) -> Union[Tuple[str, str], Tuple[str, str, str, str]]:
     """
     Synthesizes text to speech using the Hume TTS API, processes audio data, and writes audio to a file.
     if num_generations < 1 or num_generations > 2:
         raise ValueError("Invalid number of generations specified. Must be 1 or 2.")
+    hume_config = config.hume_config
     request_body = {
         "utterances": [{"text": text, "description": character_description or None}],
         "format": {
         # Extract the base64 encoded audio and generation ID from the generation
         generation_a = generations[0]
+        generation_a_id, audio_a_path = parse_hume_tts_generation(generation_a, config)
         if num_generations == 1:
             return (generation_a_id, audio_a_path)
         generation_b = generations[1]
+        generation_b_id, audio_b_path = parse_hume_tts_generation(generation_b, config)
         return (generation_a_id, audio_a_path, generation_b_id, audio_b_path)
     except Exception as e:
+        if isinstance(e, HTTPError) and CLIENT_ERROR_CODE <= e.response.status_code < SERVER_ERROR_CODE:
             raise UnretryableHumeError(
                 message=f"{e.response.text}",
                 original_exception=e,
         ) from e
+def parse_hume_tts_generation(generation: Dict[str, Any], config: Config) -> Tuple[str, str]:
     """
     Parse a Hume TTS generation response and save the decoded audio as an MP3 file.
         raise KeyError("The generation dictionary is missing the 'audio' key.")
     filename = f"{generation_id}.mp3"
+    audio_file_path = save_base64_audio_to_file(base64_audio, filename, config)
     return generation_id, audio_file_path

src/main.py ADDED Viewed

	@@ -0,0 +1,18 @@

+"""
+main.py
+This module is the entry point for the app. It loads configuration and starts the Gradio app.
+"""
+from src.app import App
+from src.config import Config, logger
+from src.database.database import init_db
+if __name__ == "__main__":
+    config = Config.get()
+    logger.info("Launching TTS Arena Gradio app...")
+    db_session_maker = init_db(config)
+    app = App(config, db_session_maker)
+    demo = app.build_gradio_interface()
+    init_db(config)
+    demo.launch(server_name="0.0.0.0", allowed_paths=[str(config.audio_dir)])

src/utils.py CHANGED Viewed

@@ -8,14 +8,17 @@ These functions provide reusable logic to simplify code in other modules.
 # Standard Library Imports
 import base64
 import json
 import random
 import time
 from pathlib import Path
-from typing import Tuple
 # Local Application Imports
 from src import constants
-from src.config import APP_ENV, AUDIO_DIR, logger
 from src.custom_types import (
     ComparisonType,
     Option,
@@ -24,7 +27,8 @@ from src.custom_types import (
     TTSProviderName,
     VotingResults,
 )
-from src.database import SessionLocal, crud
 def truncate_text(text: str, max_length: int = 50) -> str:
@@ -77,9 +81,7 @@ def validate_character_description_length(character_description: str) -> None:
     stripped_character_description = character_description.strip()
     character_description_length = len(stripped_character_description)
-    logger.debug(
-        f"Voice description length being validated: {character_description_length} characters"
-    )
     if character_description_length < constants.CHARACTER_DESCRIPTION_MIN_LENGTH:
         raise ValueError(
@@ -95,9 +97,7 @@ def validate_character_description_length(character_description: str) -> None:
         )
     truncated_description = truncate_text(stripped_character_description)
-    logger.debug(
-        f"Character description length validation passed for character_description: {truncated_description}"
-    )
 def delete_files_older_than(directory: str, minutes: int = 30) -> None:
@@ -133,7 +133,7 @@ def delete_files_older_than(directory: str, minutes: int = 30) -> None:
                     logger.exception(f"Error deleting {file_path}: {e}")
-def save_base64_audio_to_file(base64_audio: str, filename: str) -> str:
     """
     Decode a base64-encoded audio string and write the resulting binary data to a file
     within the preconfigured AUDIO_DIR directory. Prior to writing the bytes to an audio
@@ -158,11 +158,11 @@ def save_base64_audio_to_file(base64_audio: str, filename: str) -> str:
     audio_bytes = base64.b64decode(base64_audio)
     # Construct the full absolute file path within the AUDIO_DIR directory using Path.
-    file_path = Path(AUDIO_DIR) / filename
     # Delete all audio files older than 30 minutes before writing the new audio file.
     num_minutes = 30
-    delete_files_older_than(AUDIO_DIR, num_minutes)
     # Write the binary audio data to the file.
     with file_path.open("wb") as audio_file:
@@ -204,11 +204,7 @@ def choose_providers(
     hume_comparison_only = text_modified or not character_description
     provider_a = constants.HUME_AI
-    provider_b = (
-        constants.HUME_AI
-        if hume_comparison_only
-        else random.choice(constants.TTS_PROVIDERS)
-    )
     return provider_a, provider_b
@@ -277,10 +273,8 @@ def determine_selected_option(
     return selected_option, other_option
-def determine_comparison_type(
-    provider_a: TTSProviderName,
-    provider_b: TTSProviderName
-) -> ComparisonType:
     """
     Determine the comparison type based on the given TTS provider names.
@@ -311,7 +305,7 @@ def log_voting_results(voting_results: VotingResults) -> None:
     logger.info("Voting results:\n%s", json.dumps(voting_results, indent=4))
-def handle_vote_failure(e: Exception, voting_results: VotingResults, is_dummy_db_session: bool) -> None:
     """
     Handles logging when creating a vote record fails.
@@ -322,22 +316,42 @@ def handle_vote_failure(e: Exception, voting_results: VotingResults, is_dummy_db
     In development with a dummy session:
       - Only logs the voting results.
     """
-    if APP_ENV == "prod" or (APP_ENV == "dev" and not is_dummy_db_session):
-        logger.error("Failed to create vote record: %s", e, exc_info=(APP_ENV == "prod"))
         log_voting_results(voting_results)
-        if APP_ENV == "prod":
             raise e
     else:
         # Dev mode with a dummy session: only log the voting results.
         log_voting_results(voting_results)
 def submit_voting_results(
     option_map: OptionMap,
     selected_option: str,
     text_modified: bool,
     character_description: str,
     text: str,
 ) -> None:
     """
     Constructs the voting results dictionary from the provided inputs,
@@ -367,17 +381,34 @@ def submit_voting_results(
         "is_custom_text": text_modified,
     }
-    db = SessionLocal()
-    is_dummy_db_session = getattr(db, "is_dummy", False)
-    try:
-        crud.create_vote(db, voting_results)
-    except Exception as e:
-        handle_vote_failure(e, voting_results, is_dummy_db_session)
-    else:
-        logger.info("Vote record created successfully.")
-        if APP_ENV == "dev":
-            log_voting_results(voting_results)
-    finally:
-        db.close()

 # Standard Library Imports
 import base64
 import json
+import os
 import random
 import time
 from pathlib import Path
+from typing import Tuple, cast
+from sqlalchemy.orm import Session
 # Local Application Imports
 from src import constants
+from src.config import Config, logger
 from src.custom_types import (
     ComparisonType,
     Option,
     TTSProviderName,
     VotingResults,
 )
+from src.database import crud
+from src.database.database import DBSessionMaker
 def truncate_text(text: str, max_length: int = 50) -> str:
     stripped_character_description = character_description.strip()
     character_description_length = len(stripped_character_description)
+    logger.debug(f"Voice description length being validated: {character_description_length} characters")
     if character_description_length < constants.CHARACTER_DESCRIPTION_MIN_LENGTH:
         raise ValueError(
         )
     truncated_description = truncate_text(stripped_character_description)
+    logger.debug(f"Character description length validation passed for character_description: {truncated_description}")
 def delete_files_older_than(directory: str, minutes: int = 30) -> None:
                     logger.exception(f"Error deleting {file_path}: {e}")
+def save_base64_audio_to_file(base64_audio: str, filename: str, config: Config) -> str:
     """
     Decode a base64-encoded audio string and write the resulting binary data to a file
     within the preconfigured AUDIO_DIR directory. Prior to writing the bytes to an audio
     audio_bytes = base64.b64decode(base64_audio)
     # Construct the full absolute file path within the AUDIO_DIR directory using Path.
+    file_path = Path(config.audio_dir) / filename
     # Delete all audio files older than 30 minutes before writing the new audio file.
     num_minutes = 30
+    delete_files_older_than(config.audio_dir, num_minutes)
     # Write the binary audio data to the file.
     with file_path.open("wb") as audio_file:
     hume_comparison_only = text_modified or not character_description
     provider_a = constants.HUME_AI
+    provider_b = constants.HUME_AI if hume_comparison_only else random.choice(constants.TTS_PROVIDERS)
     return provider_a, provider_b
     return selected_option, other_option
+def determine_comparison_type(provider_a: TTSProviderName, provider_b: TTSProviderName) -> ComparisonType:
     """
     Determine the comparison type based on the given TTS provider names.
     logger.info("Voting results:\n%s", json.dumps(voting_results, indent=4))
+def handle_vote_failure(e: Exception, voting_results: VotingResults, is_dummy_db_session: bool, config: Config) -> None:
     """
     Handles logging when creating a vote record fails.
     In development with a dummy session:
       - Only logs the voting results.
     """
+    if config.app_env == "prod" or (config.app_env == "dev" and not is_dummy_db_session):
+        logger.error("Failed to create vote record: %s", e, exc_info=(config.app_env == "prod"))
         log_voting_results(voting_results)
+        if config.app_env == "prod":
             raise e
     else:
         # Dev mode with a dummy session: only log the voting results.
         log_voting_results(voting_results)
+def _persist_vote(db_session_maker: DBSessionMaker, voting_results: VotingResults, config: Config) -> None:
+    db = db_session_maker()
+    is_dummy_db_session = getattr(db, "is_dummy", False)
+    if is_dummy_db_session:
+        logger.info("Vote record created successfully.")
+        log_voting_results(voting_results)
+    try:
+        crud.create_vote(cast(Session, db), voting_results)
+    except Exception as e:
+        handle_vote_failure(e, voting_results, is_dummy_db_session, config)
+    else:
+        logger.info("Vote record created successfully.")
+        if config.app_env == "dev":
+            log_voting_results(voting_results)
+    finally:
+        db.close()
 def submit_voting_results(
     option_map: OptionMap,
     selected_option: str,
     text_modified: bool,
     character_description: str,
     text: str,
+    db_session_maker: DBSessionMaker,
+    config: Config,
 ) -> None:
     """
     Constructs the voting results dictionary from the provided inputs,
         "is_custom_text": text_modified,
     }
+    _persist_vote(db_session_maker, voting_results, config)
+def validate_env_var(var_name: str) -> str:
+    """
+    Validates that an environment variable is set and returns its value.
+    Args:
+        var_name (str): The name of the environment variable to validate.
+    Returns:
+        str: The value of the environment variable.
+    Raises:
+        ValueError: If the environment variable is not set.
+    Examples:
+        >>> import os
+        >>> os.environ["EXAMPLE_VAR"] = "example_value"
+        >>> validate_env_var("EXAMPLE_VAR")
+        'example_value'
+        >>> validate_env_var("MISSING_VAR")
+        Traceback (most recent call last):
+          ...
+        ValueError: MISSING_VAR is not set. Please ensure it is defined in your environment variables.
+    """
+    value = os.environ.get(var_name, "")
+    if not value:
+        raise ValueError(f"{var_name} is not set. Please ensure it is defined in your environment variables.")
+    return value

uv.lock CHANGED Viewed

@@ -167,7 +167,7 @@ name = "click"
 version = "8.1.8"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 }
 wheels = [
@@ -237,6 +237,7 @@ dependencies = [
 dev = [
     { name = "mypy" },
     { name = "pre-commit" },
     { name = "pytest" },
     { name = "ruff" },
     { name = "watchfiles" },
@@ -258,6 +259,7 @@ requires-dist = [
 dev = [
     { name = "mypy", specifier = ">=1.15.0" },
     { name = "pre-commit", specifier = ">=4.1.0" },
     { name = "pytest", specifier = ">=8.3.4" },
     { name = "ruff", specifier = ">=0.9.5" },
     { name = "watchfiles", specifier = ">=1.0.4" },
@@ -966,6 +968,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 },
 ]
 [[package]]
 name = "pytest"
 version = "8.3.4"
@@ -1228,7 +1243,7 @@ name = "tqdm"
 version = "4.67.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 }
 wheels = [

 version = "8.1.8"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "colorama", marker = "platform_system == 'Windows'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593 }
 wheels = [
 dev = [
     { name = "mypy" },
     { name = "pre-commit" },
+    { name = "pyright" },
     { name = "pytest" },
     { name = "ruff" },
     { name = "watchfiles" },
 dev = [
     { name = "mypy", specifier = ">=1.15.0" },
     { name = "pre-commit", specifier = ">=4.1.0" },
+    { name = "pyright", specifier = ">=1.1.394" },
     { name = "pytest", specifier = ">=8.3.4" },
     { name = "ruff", specifier = ">=0.9.5" },
     { name = "watchfiles", specifier = ">=1.0.4" },
     { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 },
 ]
+[[package]]
+name = "pyright"
+version = "1.1.394"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "nodeenv" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b1/e4/79f4d8a342eed6790fdebdb500e95062f319ee3d7d75ae27304ff995ae8c/pyright-1.1.394.tar.gz", hash = "sha256:56f2a3ab88c5214a451eb71d8f2792b7700434f841ea219119ade7f42ca93608", size = 3809348 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d6/4c/50c74e3d589517a9712a61a26143b587dba6285434a17aebf2ce6b82d2c3/pyright-1.1.394-py3-none-any.whl", hash = "sha256:5f74cce0a795a295fb768759bbeeec62561215dea657edcaab48a932b031ddbb", size = 5679540 },
+]
 [[package]]
 name = "pytest"
 version = "8.3.4"
 version = "4.67.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "colorama", marker = "platform_system == 'Windows'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 }
 wheels = [