Spaces:
Sleeping
Sleeping
import numpy as np | |
from mcp.server import FastMCP | |
from pydantic import Field | |
from aworld.utils import import_package | |
from aworld.logs.util import logger | |
# Import required packages | |
import_package('gtts', install_name='gTTS') | |
import_package('pyttsx3', install_name='pyttsx3') | |
import_package('librosa', install_name='librosa') | |
import_package('soundfile', install_name='soundfile') | |
import pyttsx3 | |
from gtts import gTTS | |
import librosa | |
import soundfile as sf | |
mcp = FastMCP("text_to_audio") | |
def convert_text_to_audio( | |
text: str = Field(description="Text to convert to audio"), | |
output_file: str = Field(description="Path to the generated audio file") | |
) -> str: | |
"""Convert input text to audio with child-friendly settings. | |
Args: | |
text: Input text to convert | |
output_file: Path to the generated audio file | |
Returns: | |
str: Path to the generated audio file | |
""" | |
engine = pyttsx3.init() | |
# Set default properties for child-friendly speech | |
engine.setProperty('rate', 150) # Slower speaking rate | |
engine.setProperty('volume', 0.9) | |
try: | |
# Use default params if none provided | |
params = { | |
"speed": 0.9, | |
"pitch": 1.1, | |
"language": "en-US", | |
"output_file": output_file, | |
"use_gtts": True | |
} | |
# Preprocess text for child-friendly output | |
text = _preprocess_text(text) | |
if params.get("use_gtts", False): | |
# Use gTTS for more natural sound | |
tts = gTTS(text=text, lang=params["language"], slow=True) | |
tts.save(params["output_file"]) | |
# Post-process audio if needed (adjust volume, remove noise, etc.) | |
_post_process_audio(params["output_file"]) | |
return params["output_file"] | |
except Exception as e: | |
logger.error("Error in text-to-audio conversion: %s", str(e)) | |
raise | |
def _preprocess_text(text: str) -> str: | |
"""Preprocess text for child-friendly output. | |
- Add pauses between sentences | |
- Emphasize important words | |
- Handle special characters | |
""" | |
# Add slight pauses between sentences | |
text = text.replace('. ', '... ') | |
# Add emphasis on important words (can be customized) | |
text = text.replace('!', '! ... ') | |
return text | |
def _post_process_audio(audio_file: str) -> None: | |
"""Optimized post-processing for audio files.""" | |
try: | |
# Load with a lower sample rate and mono channel | |
y, sr = librosa.load(audio_file, sr=16000, mono=True) | |
# Use faster normalization method | |
y_norm = y / np.max(np.abs(y)) | |
# Write with optimized settings | |
sf.write( | |
audio_file, | |
y_norm, | |
sr, | |
format='mp4', | |
subtype='MP4' | |
) | |
except (IOError, ValueError, RuntimeError) as e: | |
logger.warning("Audio post-processing failed: %s", e) | |
# Main function | |
if __name__ == "__main__": | |
mcp.settings.port = 8888 | |
mcp.run(transport='sse') | |
# text = "Hello, this is a test of the text-to-audio conversion." | |
# output_file = "output1.mp4" | |
# print(f"Converting text to audio: {text}") | |
# audio_file = convert_text_to_audio(text, output_file) | |
# print(f"Audio file saved to: {audio_file}") | |