import os import requests from typing import Optional from utils.config import config class TTSService: """Service for converting text to speech""" def __init__(self): self.hf_token = config.hf_token self.tts_model = "facebook/fastspeech2-en-ljspeech" self.vocoder_model = "facebook/hifigan-universal" def synthesize_speech(self, text: str) -> Optional[bytes]: """ Convert text to speech using Hugging Face API Args: text: Text to convert to speech Returns: Audio bytes or None if failed """ if not self.hf_token: print("Hugging Face token not configured for TTS") return None try: # First, generate speech with text-to-speech model tts_headers = { "Authorization": f"Bearer {self.hf_token}" } tts_payload = { "inputs": text } tts_response = requests.post( f"https://api-inference.huggingface.co/models/{self.tts_model}", headers=tts_headers, json=tts_payload ) if tts_response.status_code != 200: print(f"TTS model error: {tts_response.status_code} - {tts_response.text}") return None # Then, convert to audio with vocoder vocoder_response = requests.post( f"https://api-inference.huggingface.co/models/{self.vocoder_model}", headers=tts_headers, data=tts_response.content ) if vocoder_response.status_code == 200: return vocoder_response.content else: print(f"Vocoder error: {vocoder_response.status_code} - {vocoder_response.text}") return None except Exception as e: print(f"Error synthesizing speech: {e}") return None def save_audio_file(self, text: str, filename: str) -> bool: """ Synthesize speech and save to file Args: text: Text to convert to speech filename: Output filename (.wav) Returns: Boolean indicating success """ audio_data = self.synthesize_speech(text) if audio_data: try: with open(filename, 'wb') as f: f.write(audio_data) return True except Exception as e: print(f"Error saving audio file: {e}") return False return False # Global TTS service instance tts_service = TTSService()