|
import os |
|
import requests |
|
from typing import Optional |
|
from utils.config import config |
|
|
|
class TTSService: |
|
"""Service for converting text to speech""" |
|
|
|
def __init__(self): |
|
self.hf_token = config.hf_token |
|
self.tts_model = "facebook/fastspeech2-en-ljspeech" |
|
self.vocoder_model = "facebook/hifigan-universal" |
|
|
|
def synthesize_speech(self, text: str) -> Optional[bytes]: |
|
""" |
|
Convert text to speech using Hugging Face API |
|
|
|
Args: |
|
text: Text to convert to speech |
|
|
|
Returns: |
|
Audio bytes or None if failed |
|
""" |
|
if not self.hf_token: |
|
print("Hugging Face token not configured for TTS") |
|
return None |
|
|
|
try: |
|
|
|
tts_headers = { |
|
"Authorization": f"Bearer {self.hf_token}" |
|
} |
|
|
|
tts_payload = { |
|
"inputs": text |
|
} |
|
|
|
tts_response = requests.post( |
|
f"https://api-inference.huggingface.co/models/{self.tts_model}", |
|
headers=tts_headers, |
|
json=tts_payload |
|
) |
|
|
|
if tts_response.status_code != 200: |
|
print(f"TTS model error: {tts_response.status_code} - {tts_response.text}") |
|
return None |
|
|
|
|
|
vocoder_response = requests.post( |
|
f"https://api-inference.huggingface.co/models/{self.vocoder_model}", |
|
headers=tts_headers, |
|
data=tts_response.content |
|
) |
|
|
|
if vocoder_response.status_code == 200: |
|
return vocoder_response.content |
|
else: |
|
print(f"Vocoder error: {vocoder_response.status_code} - {vocoder_response.text}") |
|
return None |
|
|
|
except Exception as e: |
|
print(f"Error synthesizing speech: {e}") |
|
return None |
|
|
|
def save_audio_file(self, text: str, filename: str) -> bool: |
|
""" |
|
Synthesize speech and save to file |
|
|
|
Args: |
|
text: Text to convert to speech |
|
filename: Output filename (.wav) |
|
|
|
Returns: |
|
Boolean indicating success |
|
""" |
|
audio_data = self.synthesize_speech(text) |
|
if audio_data: |
|
try: |
|
with open(filename, 'wb') as f: |
|
f.write(audio_data) |
|
return True |
|
except Exception as e: |
|
print(f"Error saving audio file: {e}") |
|
return False |
|
return False |
|
|
|
|
|
tts_service = TTSService() |
|
|