Spaces:
Sleeping
Sleeping
File size: 3,273 Bytes
a1e47c1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import numpy as np
from mcp.server import FastMCP
from pydantic import Field
from aworld.utils import import_package
from aworld.logs.util import logger
# Import required packages
import_package('gtts', install_name='gTTS')
import_package('pyttsx3', install_name='pyttsx3')
import_package('librosa', install_name='librosa')
import_package('soundfile', install_name='soundfile')
import pyttsx3
from gtts import gTTS
import librosa
import soundfile as sf
mcp = FastMCP("text_to_audio")
@mcp.tool()
def convert_text_to_audio(
text: str = Field(description="Text to convert to audio"),
output_file: str = Field(description="Path to the generated audio file")
) -> str:
"""Convert input text to audio with child-friendly settings.
Args:
text: Input text to convert
output_file: Path to the generated audio file
Returns:
str: Path to the generated audio file
"""
engine = pyttsx3.init()
# Set default properties for child-friendly speech
engine.setProperty('rate', 150) # Slower speaking rate
engine.setProperty('volume', 0.9)
try:
# Use default params if none provided
params = {
"speed": 0.9,
"pitch": 1.1,
"language": "en-US",
"output_file": output_file,
"use_gtts": True
}
# Preprocess text for child-friendly output
text = _preprocess_text(text)
if params.get("use_gtts", False):
# Use gTTS for more natural sound
tts = gTTS(text=text, lang=params["language"], slow=True)
tts.save(params["output_file"])
# Post-process audio if needed (adjust volume, remove noise, etc.)
_post_process_audio(params["output_file"])
return params["output_file"]
except Exception as e:
logger.error("Error in text-to-audio conversion: %s", str(e))
raise
def _preprocess_text(text: str) -> str:
"""Preprocess text for child-friendly output.
- Add pauses between sentences
- Emphasize important words
- Handle special characters
"""
# Add slight pauses between sentences
text = text.replace('. ', '... ')
# Add emphasis on important words (can be customized)
text = text.replace('!', '! ... ')
return text
def _post_process_audio(audio_file: str) -> None:
"""Optimized post-processing for audio files."""
try:
# Load with a lower sample rate and mono channel
y, sr = librosa.load(audio_file, sr=16000, mono=True)
# Use faster normalization method
y_norm = y / np.max(np.abs(y))
# Write with optimized settings
sf.write(
audio_file,
y_norm,
sr,
format='mp4',
subtype='MP4'
)
except (IOError, ValueError, RuntimeError) as e:
logger.warning("Audio post-processing failed: %s", e)
# Main function
if __name__ == "__main__":
mcp.settings.port = 8888
mcp.run(transport='sse')
# text = "Hello, this is a test of the text-to-audio conversion."
# output_file = "output1.mp4"
# print(f"Converting text to audio: {text}")
# audio_file = convert_text_to_audio(text, output_file)
# print(f"Audio file saved to: {audio_file}")
|