Spaces:

HumeAI
/

expressive-tts-arena

Running

App Files Files Community

zach commited on Feb 25

Commit

104737f

1 Parent(s): 80026d8

Update API integration code to be async

Browse files

Files changed (9) hide show

pyproject.toml +1 -1
src/app.py +31 -78
src/constants.py +1 -1
src/database/database.py +15 -14
src/integrations/anthropic_api.py +4 -1
src/integrations/elevenlabs_api.py +18 -24
src/integrations/hume_api.py +58 -73
src/main.py +12 -2
uv.lock +0 -2

pyproject.toml CHANGED Viewed

@@ -12,7 +12,6 @@ dependencies = [
     "greenlet>=2.0.0",
     "httpx>=0.24.1",
     "python-dotenv>=1.0.1",
-    "requests>=2.32.3",
     "sqlalchemy>=2.0.0",
     "tenacity>=9.0.0",
 ]
@@ -45,6 +44,7 @@ ignore = [
     "PLR0912",
     "PLR0913",
     "PLR2004",
     "TD002",
     "TD003",
 ]

     "greenlet>=2.0.0",
     "httpx>=0.24.1",
     "python-dotenv>=1.0.1",
     "sqlalchemy>=2.0.0",
     "tenacity>=9.0.0",
 ]
     "PLR0912",
     "PLR0913",
     "PLR2004",
+    "RUF006",
     "TD002",
     "TD003",
 ]

src/app.py CHANGED Viewed

@@ -10,9 +10,7 @@ Users can compare the outputs and vote for their favorite in an interactive UI.
 # Standard Library Imports
 import asyncio
-import threading
 import time
-from concurrent.futures import ThreadPoolExecutor
 from typing import Tuple
 # Third-Party Library Imports
@@ -83,7 +81,7 @@ class App:
             logger.error(f"Unexpected error while generating text: {e}")
             raise gr.Error("Failed to generate text. Please try again later.")
-    def _synthesize_speech(
         self,
         character_description: str,
         text: str,
@@ -130,38 +128,34 @@ class App:
             if provider_b == constants.HUME_AI:
                 num_generations = 2
                 # If generating 2 Hume outputs, do so in a single API call.
-                result = text_to_speech_with_hume(character_description, text, num_generations, self.config)
                 # Enforce that 4 values are returned.
                 if not (isinstance(result, tuple) and len(result) == 4):
                     raise ValueError("Expected 4 values from Hume TTS call when generating 2 outputs")
                 generation_id_a, audio_a, generation_id_b, audio_b = result
             else:
-                with ThreadPoolExecutor(max_workers=2) as executor:
-                    num_generations = 1
-                    # Generate a single Hume output.
-                    future_audio_a = executor.submit(
-                        text_to_speech_with_hume, character_description, text, num_generations, self.config
-                    )
-                    # Generate a second TTS output from the second provider.
-                    match provider_b:
-                        case constants.ELEVENLABS:
-                            future_audio_b = executor.submit(
-                                text_to_speech_with_elevenlabs, character_description, text, self.config
-                            )
-                        case _:
-                            # Additional TTS Providers can be added here.
-                            raise ValueError(f"Unsupported provider: {provider_b}")
-                    result_a = future_audio_a.result()
-                    result_b = future_audio_b.result()
-                    if isinstance(result_a, tuple) and len(result_a) >= 2:
-                        generation_id_a, audio_a = result_a[0], result_a[1]
-                    else:
-                        raise ValueError("Unexpected return from text_to_speech_with_hume")
-                    if isinstance(result_b, tuple) and len(result_b) >= 2:
-                        generation_id_b, audio_b = result_b[0], result_b[1] # type: ignore
-                    else:
-                        raise ValueError("Unexpected return from text_to_speech_with_elevenlabs")
             # Shuffle options so that placement of options in the UI will always be random.
             option_a = Option(provider=provider_a, audio=audio_a, generation_id=generation_id_a)
@@ -190,47 +184,7 @@ class App:
             raise gr.Error("An unexpected error occurred. Please try again later.")
-    def _background_submit_vote(
-        self,
-        option_map: OptionMap,
-        selected_option: constants.OptionKey,
-        text_modified: bool,
-        character_description: str,
-        text: str,
-    ) -> None:
-        """
-        Runs the vote submission in a background thread.
-        Creates a new event loop and runs the async submit_voting_results function in it.
-        Args:
-            Same as submit_voting_results
-        Returns:
-            None
-        """
-        try:
-            # Create a new event loop for this thread
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
-            # Run the async function in the new loop
-            loop.run_until_complete(submit_voting_results(
-                option_map,
-                selected_option,
-                text_modified,
-                character_description,
-                text,
-                self.db_session_maker,
-                self.config,
-            ))
-        except Exception as e:
-            logger.error(f"Error in background vote submission thread: {e}", exc_info=True)
-        finally:
-            # Close the loop when done
-            loop.close()
-    def _vote(
         self,
         vote_submitted: bool,
         option_map: OptionMap,
@@ -261,19 +215,18 @@ class App:
         selected_provider = option_map[selected_option]["provider"]
         other_provider = option_map[other_option]["provider"]
-        # Start a background thread for the database operation
-        thread = threading.Thread(
-            target=self._background_submit_vote,
-            args=(
                 option_map,
                 selected_option,
                 text_modified,
                 character_description,
                 text,
-            ),
-            daemon=True
         )
-        thread.start()
         # Build button text, displaying the provider and voice name, appending the trophy emoji to the selected option.
         selected_label = f"{selected_provider} {constants.TROPHY_EMOJI}"

 # Standard Library Imports
 import asyncio
 import time
 from typing import Tuple
 # Third-Party Library Imports
             logger.error(f"Unexpected error while generating text: {e}")
             raise gr.Error("Failed to generate text. Please try again later.")
+    async def _synthesize_speech(
         self,
         character_description: str,
         text: str,
             if provider_b == constants.HUME_AI:
                 num_generations = 2
                 # If generating 2 Hume outputs, do so in a single API call.
+                result = await text_to_speech_with_hume(character_description, text, num_generations, self.config)
                 # Enforce that 4 values are returned.
                 if not (isinstance(result, tuple) and len(result) == 4):
                     raise ValueError("Expected 4 values from Hume TTS call when generating 2 outputs")
                 generation_id_a, audio_a, generation_id_b, audio_b = result
             else:
+                num_generations = 1
+                # Run both API calls concurrently using asyncio
+                tasks = []
+                # Generate a single Hume output
+                tasks.append(text_to_speech_with_hume(character_description, text, num_generations, self.config))
+                # Generate a second TTS output from the second provider
+                match provider_b:
+                    case constants.ELEVENLABS:
+                        tasks.append(text_to_speech_with_elevenlabs(character_description, text, self.config))
+                    case _:
+                        # Additional TTS Providers can be added here.
+                        raise ValueError(f"Unsupported provider: {provider_b}")
+                # Await both tasks concurrently
+                result_a, result_b = await asyncio.gather(*tasks)
+                if not isinstance(result_a, tuple) or len(result_a) != 2:
+                    raise ValueError("Expected 2 values from Hume TTS call when generating 1 output")
+                generation_id_a, audio_a = result_a[0], result_a[1]
+                generation_id_b, audio_b = result_b[0], result_b[1]
             # Shuffle options so that placement of options in the UI will always be random.
             option_a = Option(provider=provider_a, audio=audio_a, generation_id=generation_id_a)
             raise gr.Error("An unexpected error occurred. Please try again later.")
+    async def _vote(
         self,
         vote_submitted: bool,
         option_map: OptionMap,
         selected_provider = option_map[selected_option]["provider"]
         other_provider = option_map[other_option]["provider"]
+        # Process vote in the background without blocking the UI
+        asyncio.create_task(
+            submit_voting_results(
                 option_map,
                 selected_option,
                 text_modified,
                 character_description,
                 text,
+                self.db_session_maker,
+                self.config,
+            )
         )
         # Build button text, displaying the provider and voice name, appending the trophy emoji to the selected option.
         selected_label = f"{selected_provider} {constants.TROPHY_EMOJI}"

src/constants.py CHANGED Viewed

@@ -59,7 +59,7 @@ SAMPLE_CHARACTER_DESCRIPTIONS: dict = {
         "building tension through perfectly timed pauses and haunting inflections."
     ),
     "🌿 British Naturalist": (
-        "A passionate nature documentarian with a voice that brings the wild to life—crisp, refined "
         "tones brimming with wonder and expertise. It shifts seamlessly from hushed observation to "
         "animated excitement, painting vivid pictures of the natural world's endless marvels."
     ),

         "building tension through perfectly timed pauses and haunting inflections."
     ),
     "🌿 British Naturalist": (
+        "A passionate, British nature documentarian with a voice that brings the wild to life—crisp, refined "
         "tones brimming with wonder and expertise. It shifts seamlessly from hushed observation to "
         "animated excitement, painting vivid pictures of the natural world's endless marvels."
     ),

src/database/database.py CHANGED Viewed

@@ -9,25 +9,22 @@ If no DATABASE_URL environment variable is set, then create a dummy database to
 """
 # Standard Library Imports
-from typing import Callable, Optional
 # Third-Party Library Imports
 from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, async_sessionmaker, create_async_engine
 from sqlalchemy.orm import DeclarativeBase
 # Local Application Imports
-from src.config import Config
-# Define the SQLAlchemy Base using SQLAlchemy 2.0 style.
 class Base(DeclarativeBase):
     pass
-engine: Optional[AsyncEngine] = None
-class AsyncDummySession:
     is_dummy = True  # Flag to indicate this is a dummy session.
     async def __enter__(self):
@@ -42,11 +39,11 @@ class AsyncDummySession:
     async def commit(self):
         # Raise an exception to simulate failure when attempting a write.
-        raise RuntimeError("DummySession does not support commit operations.")
     async def refresh(self, _instance):
         # Raise an exception to simulate failure when attempting to refresh.
-        raise RuntimeError("DummySession does not support refresh operations.")
     async def rollback(self):
         # No-op: there's nothing to roll back.
@@ -57,8 +54,8 @@ class AsyncDummySession:
         pass
-# AsyncDBSessionMaker is either a async_sessionmaker instance or a callable that returns a AsyncDummySession.
-AsyncDBSessionMaker = async_sessionmaker | Callable[[], AsyncDummySession]
 def init_db(config: Config) -> AsyncDBSessionMaker:
@@ -88,21 +85,25 @@ def init_db(config: Config) -> AsyncDBSessionMaker:
         # In production, a valid DATABASE_URL is required.
         if not config.database_url:
             raise ValueError("DATABASE_URL must be set in production!")
         async_db_url = convert_to_async_url(config.database_url)
         engine = create_async_engine(async_db_url)
         return async_sessionmaker(bind=engine, expire_on_commit=False, class_=AsyncSession)
     # In development, if a DATABASE_URL is provided, use it.
     if config.database_url:
         async_db_url = convert_to_async_url(config.database_url)
         engine = create_async_engine(async_db_url)
         return async_sessionmaker(bind=engine, expire_on_commit=False, class_=AsyncSession)
-    # No DATABASE_URL is provided; use a DummySession that does nothing.
     engine = None
-    def async_dummy_session_factory() -> AsyncDummySession:
-        return AsyncDummySession()
     return async_dummy_session_factory

 """
 # Standard Library Imports
+from typing import Callable, Optional, TypeAlias, Union
 # Third-Party Library Imports
 from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, async_sessionmaker, create_async_engine
 from sqlalchemy.orm import DeclarativeBase
 # Local Application Imports
+from src.config import Config, logger
+# Define the SQLAlchemy Base
 class Base(DeclarativeBase):
     pass
+class DummyAsyncSession:
     is_dummy = True  # Flag to indicate this is a dummy session.
     async def __enter__(self):
     async def commit(self):
         # Raise an exception to simulate failure when attempting a write.
+        raise RuntimeError("DummyAsyncSession does not support commit operations.")
     async def refresh(self, _instance):
         # Raise an exception to simulate failure when attempting to refresh.
+        raise RuntimeError("DummyAsyncSession does not support refresh operations.")
     async def rollback(self):
         # No-op: there's nothing to roll back.
         pass
+AsyncDBSessionMaker: TypeAlias = Union[async_sessionmaker[AsyncSession], Callable[[], DummyAsyncSession]]
+engine: Optional[AsyncEngine] = None
 def init_db(config: Config) -> AsyncDBSessionMaker:
         # In production, a valid DATABASE_URL is required.
         if not config.database_url:
             raise ValueError("DATABASE_URL must be set in production!")
         async_db_url = convert_to_async_url(config.database_url)
         engine = create_async_engine(async_db_url)
         return async_sessionmaker(bind=engine, expire_on_commit=False, class_=AsyncSession)
     # In development, if a DATABASE_URL is provided, use it.
     if config.database_url:
         async_db_url = convert_to_async_url(config.database_url)
         engine = create_async_engine(async_db_url)
         return async_sessionmaker(bind=engine, expire_on_commit=False, class_=AsyncSession)
+    # No DATABASE_URL is provided; use a DummyAsyncSession that does nothing.
     engine = None
+    logger.warning("No DATABASE_URL provided - database operations will use DummyAsyncSession")
+    def async_dummy_session_factory() -> DummyAsyncSession:
+        return DummyAsyncSession()
     return async_dummy_session_factory

src/integrations/anthropic_api.py CHANGED Viewed

@@ -84,7 +84,8 @@ class AnthropicConfig:
         from anthropic import AsyncAnthropic  # Import the async client from Anthropic SDK
         return AsyncAnthropic(api_key=self.api_key)
-    def build_expressive_prompt(self, character_description: str) -> str:
         """
         Constructs and returns a prompt based solely on the provided character description.
         The returned prompt is intended to instruct Claude to generate expressive text from a character,
@@ -120,6 +121,8 @@ class UnretryableAnthropicError(AnthropicError):
     def __init__(self, message: str, original_exception: Optional[Exception] = None) -> None:
         super().__init__(message, original_exception)
 @retry(

         from anthropic import AsyncAnthropic  # Import the async client from Anthropic SDK
         return AsyncAnthropic(api_key=self.api_key)
+    @staticmethod
+    def build_expressive_prompt(character_description: str) -> str:
         """
         Constructs and returns a prompt based solely on the provided character description.
         The returned prompt is intended to instruct Claude to generate expressive text from a character,
     def __init__(self, message: str, original_exception: Optional[Exception] = None) -> None:
         super().__init__(message, original_exception)
+        self.original_exception = original_exception
+        self.message = message
 @retry(

src/integrations/elevenlabs_api.py CHANGED Viewed

@@ -10,13 +10,6 @@ Key Features:
 - Handles received audio and processes it for playback on the web.
 - Provides detailed logging for debugging and error tracking.
 - Utilizes robust error handling (EAFP) to validate API responses.
-Classes:
-- ElevenLabsConfig: Immutable configuration for interacting with ElevenLabs' TTS API.
-- ElevenLabsError: Custom exception for ElevenLabs API-related errors.
-Functions:
-- text_to_speech_with_elevenlabs: Synthesizes speech from text using ElevenLabs' TTS API.
 """
 # Standard Library Imports
@@ -26,9 +19,9 @@ from dataclasses import dataclass, field
 from typing import Optional, Tuple
 # Third-Party Library Imports
-from elevenlabs import ElevenLabs, TextToVoiceCreatePreviewsRequestOutputFormat
 from elevenlabs.core import ApiError
-from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
 # Local Application Imports
 from src.config import Config, logger
@@ -48,19 +41,18 @@ class ElevenLabsConfig:
         if not self.output_format:
             raise ValueError("ElevenLabs TTS API output format is not set.")
-        # Compute the API key from the environment.
         computed_key = validate_env_var("ELEVENLABS_API_KEY")
         object.__setattr__(self, "api_key", computed_key)
     @property
-    def client(self) -> ElevenLabs:
         """
-        Lazy initialization of the ElevenLabs client.
         Returns:
-            ElevenLabs: Configured client instance.
         """
-        return ElevenLabs(api_key=self.api_key)
 class ElevenLabsError(Exception):
@@ -77,42 +69,43 @@ class UnretryableElevenLabsError(ElevenLabsError):
     def __init__(self, message: str, original_exception: Optional[Exception] = None):
         super().__init__(message, original_exception)
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_fixed(2),
     before=before_log(logger, logging.DEBUG),
     after=after_log(logger, logging.DEBUG),
     reraise=True,
 )
-def text_to_speech_with_elevenlabs(
     character_description: str, text: str, config: Config
 ) -> Tuple[None, str]:
     """
-    Synthesizes text to speech using the ElevenLabs TTS API, processes the audio data, and writes it to a file.
     Args:
-        character_description (str): The character description used as the voice description.
         text (str): The text to be synthesized into speech.
     Returns:
         Tuple[None, str]: A tuple containing:
-            - generation_id (None): We do not record the generation ID for ElevenLabs, but return None for uniformity
-                                    across TTS integrations.
-            - file_path (str): The relative file path to the audio file where the synthesized speech was saved.
     Raises:
         ElevenLabsError: If there is an error communicating with the ElevenLabs API or processing the response.
     """
     logger.debug(f"Synthesizing speech with ElevenLabs. Text length: {len(text)} characters.")
     elevenlabs_config = config.elevenlabs_config
     try:
         # Synthesize speech using the ElevenLabs SDK
-        response = elevenlabs_config.client.text_to_voice.create_previews(
             voice_description=character_description,
             text=text,
             output_format=elevenlabs_config.output_format,
@@ -129,9 +122,10 @@ def text_to_speech_with_elevenlabs(
         generated_voice_id = preview.generated_voice_id
         base64_audio = preview.audio_base_64
         filename = f"{generated_voice_id}.mp3"
-        audio_file_path = save_base64_audio_to_file(base64_audio, filename, config)
         # Write audio to file and return the relative path
         return None, audio_file_path
     except Exception as e:

 - Handles received audio and processes it for playback on the web.
 - Provides detailed logging for debugging and error tracking.
 - Utilizes robust error handling (EAFP) to validate API responses.
 """
 # Standard Library Imports
 from typing import Optional, Tuple
 # Third-Party Library Imports
+from elevenlabs import AsyncElevenLabs, TextToVoiceCreatePreviewsRequestOutputFormat
 from elevenlabs.core import ApiError
+from tenacity import after_log, before_log, retry, retry_if_exception, stop_after_attempt, wait_fixed
 # Local Application Imports
 from src.config import Config, logger
         if not self.output_format:
             raise ValueError("ElevenLabs TTS API output format is not set.")
         computed_key = validate_env_var("ELEVENLABS_API_KEY")
         object.__setattr__(self, "api_key", computed_key)
     @property
+    def client(self) -> AsyncElevenLabs:
         """
+        Lazy initialization of the asynchronous ElevenLabs client.
         Returns:
+            AsyncElevenLabs: Configured async client instance.
         """
+        return AsyncElevenLabs(api_key=self.api_key)
 class ElevenLabsError(Exception):
     def __init__(self, message: str, original_exception: Optional[Exception] = None):
         super().__init__(message, original_exception)
+        self.original_exception = original_exception
+        self.message = message
 @retry(
+    retry=retry_if_exception(lambda e: not isinstance(e, UnretryableElevenLabsError)),
     stop=stop_after_attempt(3),
     wait=wait_fixed(2),
     before=before_log(logger, logging.DEBUG),
     after=after_log(logger, logging.DEBUG),
     reraise=True,
 )
+async def text_to_speech_with_elevenlabs(
     character_description: str, text: str, config: Config
 ) -> Tuple[None, str]:
     """
+    Asynchronously synthesizes speech using the ElevenLabs TTS API, processes the audio data, and writes it to a file.
     Args:
+        character_description (str): The character description used for voice synthesis.
         text (str): The text to be synthesized into speech.
+        config (Config): Application configuration containing ElevenLabs API settings.
     Returns:
         Tuple[None, str]: A tuple containing:
+            - generation_id (None): A placeholder (no generation ID is returned).
+            - file_path (str): The relative file path to the saved audio file.
     Raises:
         ElevenLabsError: If there is an error communicating with the ElevenLabs API or processing the response.
     """
     logger.debug(f"Synthesizing speech with ElevenLabs. Text length: {len(text)} characters.")
     elevenlabs_config = config.elevenlabs_config
     try:
         # Synthesize speech using the ElevenLabs SDK
+        response = await elevenlabs_config.client.text_to_voice.create_previews(
             voice_description=character_description,
             text=text,
             output_format=elevenlabs_config.output_format,
         generated_voice_id = preview.generated_voice_id
         base64_audio = preview.audio_base_64
         filename = f"{generated_voice_id}.mp3"
         # Write audio to file and return the relative path
+        audio_file_path = save_base64_audio_to_file(base64_audio, filename, config)
         return None, audio_file_path
     except Exception as e:

src/integrations/hume_api.py CHANGED Viewed

@@ -9,13 +9,6 @@ Key Features:
 - Implements retry logic for handling transient API errors.
 - Handles received audio and processes it for playback on the web.
 - Provides detailed logging for debugging and error tracking.
-Classes:
-- HumeConfig: Immutable configuration for interacting with Hume's TTS API.
-- HumeError: Custom exception for Hume API-related errors.
-Functions:
-- text_to_speech_with_hume: Synthesizes speech from text using Hume's TTS API.
 """
 # Standard Library Imports
@@ -24,9 +17,8 @@ from dataclasses import dataclass, field
 from typing import Any, Dict, Literal, Tuple, Union
 # Third-Party Library Imports
-import requests
-from requests.exceptions import HTTPError
-from tenacity import after_log, before_log, retry, stop_after_attempt, wait_fixed
 # Local Application Imports
 from src.config import Config, logger
@@ -41,12 +33,9 @@ HumeSupportedFileFormat = Literal["mp3", "pcm", "wav"]
 class HumeConfig:
     """Immutable configuration for interacting with the Hume TTS API."""
-    # Computed fields.
     api_key: str = field(init=False)
     headers: Dict[str, str] = field(init=False)
-    # Provided fields.
-    url: str = "https://test-api.hume.ai/v0/tts/octave"
     file_format: HumeSupportedFileFormat = "mp3"
     def __post_init__(self) -> None:
@@ -56,11 +45,8 @@ class HumeConfig:
         if not self.file_format:
             raise ValueError("Hume TTS file format is not set.")
-        # Compute the API key from the environment.
         computed_api_key = validate_env_var("HUME_API_KEY")
         object.__setattr__(self, "api_key", computed_api_key)
-        # Compute the headers.
         computed_headers = {
             "X-Hume-Api-Key": f"{computed_api_key}",
             "Content-Type": "application/json",
@@ -83,38 +69,36 @@ class UnretryableHumeError(HumeError):
     def __init__(self, message: str, original_exception: Union[Exception, None] = None):
         super().__init__(message, original_exception)
         self.original_exception = original_exception
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_fixed(2),
     before=before_log(logger, logging.DEBUG),
     after=after_log(logger, logging.DEBUG),
     reraise=True,
 )
-def text_to_speech_with_hume(
     character_description: str,
     text: str,
     num_generations: int,
     config: Config,
 ) -> Union[Tuple[str, str], Tuple[str, str, str, str]]:
     """
-    Synthesizes text to speech using the Hume TTS API, processes audio data, and writes audio to a file.
-    This function sends a POST request to the Hume TTS API with a character description and text
-    to be converted to speech. Depending on the specified number of generations (allowed values: 1 or 2),
-    the API returns one or two generations. For each generation, the function extracts the base64-encoded
-    audio and the generation ID, saves the audio as an MP3 file via the `save_base64_audio_to_file` helper,
-    and returns the relevant details.
     Args:
-        character_description (str): A description of the character, which is used as contextual input
-            for generating the voice.
-        text (str): The text to be converted to speech.
-        num_generations (int): The number of audio generations to request from the API.
-            Allowed values are 1 or 2. If 1, only a single generation is processed; if 2, a second
-            generation is expected in the API response.
-        config (Config): The application configuration containing Hume API settings.
     Returns:
         Union[Tuple[str, str], Tuple[str, str, str, str]]:
@@ -123,15 +107,13 @@ def text_to_speech_with_hume(
     Raises:
         ValueError: If num_generations is not 1 or 2.
-        HumeError: If there is an error communicating with the Hume TTS API or parsing its response.
-        UnretryableHumeError: If a client-side HTTP error (status code in the 4xx range) is encountered.
-        Exception: Any other exceptions raised during the request or processing will be wrapped and
-                   re-raised as HumeError.
     """
     logger.debug(
-        f"Processing TTS with Hume. Prompt length: {len(character_description)} characters. "
-        f"Text length: {len(text)} characters."
     )
     if num_generations < 1 or num_generations > 2:
@@ -145,14 +127,15 @@ def text_to_speech_with_hume(
     }
     try:
-        # Synthesize speech using the Hume TTS API
-        response = requests.post(
-            url=hume_config.url,
-            headers=hume_config.headers,
-            json=request_body,
-        )
-        response.raise_for_status()
-        response_data = response.json()
         generations = response_data.get("generations")
         if not generations:
@@ -160,7 +143,6 @@ def text_to_speech_with_hume(
             logger.error(msg)
             raise HumeError(msg)
-        # Extract the base64 encoded audio and generation ID from the generation.
         generation_a = generations[0]
         generation_a_id, audio_a_path = _parse_hume_tts_generation(generation_a, config)
@@ -171,48 +153,51 @@ def text_to_speech_with_hume(
         generation_b_id, audio_b_path = _parse_hume_tts_generation(generation_b, config)
         return (generation_a_id, audio_a_path, generation_b_id, audio_b_path)
-    except Exception as e:
-        if (
-            isinstance(e, HTTPError)
-            and e.response is not None
-            and CLIENT_ERROR_CODE <= e.response.status_code < SERVER_ERROR_CODE
-        ):
             raise UnretryableHumeError(
-                message=f"{e.response.text}",
                 original_exception=e,
             ) from e
         raise HumeError(
-            message=f"{e}",
             original_exception=e,
         ) from e
 def _parse_hume_tts_generation(generation: Dict[str, Any], config: Config) -> Tuple[str, str]:
     """
-    Parse a Hume TTS generation response and save the decoded audio as an MP3 file.
-    This function extracts the generation ID and the base64-encoded audio from the provided
-    dictionary. It then decodes and saves the audio data to an MP3 file, naming the file using
-    the generation ID. Finally, it returns a tuple containing the generation ID and the file path
-    of the saved audio.
     Args:
-        generation (Dict[str, Any]): A dictionary representing the TTS generation response from Hume.
-            Expected keys are:
-                - "generation_id" (str): A unique identifier for the generated audio.
-                - "audio" (str): A base64 encoded string of the audio data.
-        config (Config): The application configuration used for saving the audio file.
     Returns:
-        Tuple[str, str]: A tuple containing:
-            - generation_id (str): The unique identifier for the audio generation.
-            - audio_path (str): The filesystem path where the audio file was saved.
     Raises:
-        KeyError: If the "generation_id" or "audio" key is missing from the generation dictionary.
-        Exception: Propagates any exceptions raised by save_base64_audio_to_file, such as errors during
-                   the decoding or file saving process.
     """
     generation_id = generation.get("generation_id")
     if generation_id is None:

 - Implements retry logic for handling transient API errors.
 - Handles received audio and processes it for playback on the web.
 - Provides detailed logging for debugging and error tracking.
 """
 # Standard Library Imports
 from typing import Any, Dict, Literal, Tuple, Union
 # Third-Party Library Imports
+import httpx
+from tenacity import after_log, before_log, retry, retry_if_exception, stop_after_attempt, wait_fixed
 # Local Application Imports
 from src.config import Config, logger
 class HumeConfig:
     """Immutable configuration for interacting with the Hume TTS API."""
     api_key: str = field(init=False)
     headers: Dict[str, str] = field(init=False)
+    url: str = "https://api.hume.ai/v0/tts/octave"
     file_format: HumeSupportedFileFormat = "mp3"
     def __post_init__(self) -> None:
         if not self.file_format:
             raise ValueError("Hume TTS file format is not set.")
         computed_api_key = validate_env_var("HUME_API_KEY")
         object.__setattr__(self, "api_key", computed_api_key)
         computed_headers = {
             "X-Hume-Api-Key": f"{computed_api_key}",
             "Content-Type": "application/json",
     def __init__(self, message: str, original_exception: Union[Exception, None] = None):
         super().__init__(message, original_exception)
         self.original_exception = original_exception
+        self.message = message
 @retry(
+    retry=retry_if_exception(lambda e: not isinstance(e, UnretryableHumeError)),
     stop=stop_after_attempt(3),
     wait=wait_fixed(2),
     before=before_log(logger, logging.DEBUG),
     after=after_log(logger, logging.DEBUG),
     reraise=True,
 )
+async def text_to_speech_with_hume(
     character_description: str,
     text: str,
     num_generations: int,
     config: Config,
 ) -> Union[Tuple[str, str], Tuple[str, str, str, str]]:
     """
+    Asynchronously synthesizes speech using the Hume TTS API, processes audio data, and writes audio to a file.
+    This function sends a POST request to the Hume TTS API with a character description and text to be converted to
+    speech. Depending on the specified number of generations (1 or 2), the API returns one or two generations.
+    For each generation, the function extracts the base64-encoded audio and generation ID, saves the audio as an MP3
+    file, and returns the relevant details.
     Args:
+        character_description (str): Description used for voice synthesis.
+        text (str): Text to be converted to speech.
+        num_generations (int): Number of audio generations to request (1 or 2).
+        config (Config): Application configuration containing Hume API settings.
     Returns:
         Union[Tuple[str, str], Tuple[str, str, str, str]]:
     Raises:
         ValueError: If num_generations is not 1 or 2.
+        HumeError: For errors communicating with the Hume API.
+        UnretryableHumeError: For client-side HTTP errors (status code 4xx).
     """
     logger.debug(
+        "Processing TTS with Hume. "
+        f"Character description length: {len(character_description)}. "
+        f"Text length: {len(text)}."
     )
     if num_generations < 1 or num_generations > 2:
     }
     try:
+        async with httpx.AsyncClient() as client:
+            response = await client.post(
+                url=hume_config.url,
+                headers=hume_config.headers,
+                json=request_body,
+                timeout=30.0,
+            )
+            response.raise_for_status()
+            response_data = response.json()
         generations = response_data.get("generations")
         if not generations:
             logger.error(msg)
             raise HumeError(msg)
         generation_a = generations[0]
         generation_a_id, audio_a_path = _parse_hume_tts_generation(generation_a, config)
         generation_b_id, audio_b_path = _parse_hume_tts_generation(generation_b, config)
         return (generation_a_id, audio_a_path, generation_b_id, audio_b_path)
+    except httpx.ReadTimeout as e:
+        # Handle timeout specifically
+        raise HumeError(
+            message="Request to Hume API timed out. Please try again later.",
+            original_exception=e,
+        ) from e
+    except httpx.HTTPStatusError as e:
+        if e.response is not None and CLIENT_ERROR_CODE <= e.response.status_code < SERVER_ERROR_CODE:
+            error_message = f"HTTP Error {e.response.status_code}: {e.response.text}"
+            logger.error(error_message)
             raise UnretryableHumeError(
+                message=error_message,
                 original_exception=e,
             ) from e
+        error_message = f"HTTP Error {e.response.status_code if e.response else 'unknown'}"
+        logger.error(error_message)
         raise HumeError(
+            message=error_message,
             original_exception=e,
         ) from e
+    except Exception as e:
+        error_type = type(e).__name__
+        error_message = str(e) if str(e) else f"An error of type {error_type} occurred"
+        logger.error("Error during Hume API call: %s - %s", error_type, error_message)
+        raise HumeError(
+            message=error_message,
+            original_exception=e,
+        ) from e
 def _parse_hume_tts_generation(generation: Dict[str, Any], config: Config) -> Tuple[str, str]:
     """
+    Parses a Hume TTS generation response and saves the decoded audio as an MP3 file.
     Args:
+        generation (Dict[str, Any]): TTS generation response containing 'generation_id' and 'audio'.
+        config (Config): Application configuration for saving the audio file.
     Returns:
+        Tuple[str, str]: (generation_id, audio_path)
     Raises:
+        KeyError: If expected keys are missing.
+        Exception: Propagates exceptions from saving the audio file.
     """
     generation_id = generation.get("generation_id")
     if generation_id is None:

src/main.py CHANGED Viewed

@@ -4,16 +4,26 @@ main.py
 This module is the entry point for the app. It loads configuration and starts the Gradio app.
 """
 # Local Application Imports
 from src.app import App
 from src.config import Config, logger
 from src.database import init_db
-if __name__ == "__main__":
     config = Config.get()
     logger.info("Launching TTS Arena Gradio app...")
     db_session_maker = init_db(config)
     app = App(config, db_session_maker)
     demo = app.build_gradio_interface()
-    init_db(config)
     demo.launch(server_name="0.0.0.0", allowed_paths=[str(config.audio_dir)])

 This module is the entry point for the app. It loads configuration and starts the Gradio app.
 """
+# Standard Library Imports
+import asyncio
 # Local Application Imports
 from src.app import App
 from src.config import Config, logger
 from src.database import init_db
+async def main():
+    """
+    Asynchronous main function to initialize the application.
+    """
     config = Config.get()
     logger.info("Launching TTS Arena Gradio app...")
     db_session_maker = init_db(config)
     app = App(config, db_session_maker)
     demo = app.build_gradio_interface()
     demo.launch(server_name="0.0.0.0", allowed_paths=[str(config.audio_dir)])
+if __name__ == "__main__":
+    asyncio.run(main())

uv.lock CHANGED Viewed

@@ -262,7 +262,6 @@ dependencies = [
     { name = "greenlet" },
     { name = "httpx" },
     { name = "python-dotenv" },
-    { name = "requests" },
     { name = "sqlalchemy" },
     { name = "tenacity" },
 ]
@@ -287,7 +286,6 @@ requires-dist = [
     { name = "greenlet", specifier = ">=2.0.0" },
     { name = "httpx", specifier = ">=0.24.1" },
     { name = "python-dotenv", specifier = ">=1.0.1" },
-    { name = "requests", specifier = ">=2.32.3" },
     { name = "sqlalchemy", specifier = ">=2.0.0" },
     { name = "tenacity", specifier = ">=9.0.0" },
 ]

     { name = "greenlet" },
     { name = "httpx" },
     { name = "python-dotenv" },
     { name = "sqlalchemy" },
     { name = "tenacity" },
 ]
     { name = "greenlet", specifier = ">=2.0.0" },
     { name = "httpx", specifier = ">=0.24.1" },
     { name = "python-dotenv", specifier = ">=1.0.1" },
     { name = "sqlalchemy", specifier = ">=2.0.0" },
     { name = "tenacity", specifier = ">=9.0.0" },
 ]