import gradio as gr import tempfile import uuid import os from kittentts import KittenTTS import soundfile as sf # Initialize the TTS model model = KittenTTS("KittenML/kitten-tts-nano-0.1") def generate_speech(text, voice, speed): """ Generate speech from text using KittenTTS Args: text (str): Text to convert to speech voice (str): Voice to use for generation speed (float): Speed of speech generation Returns: str: Path to generated audio file """ if not text.strip(): return None, "Please enter some text to generate speech." try: # Generate audio audio = model.generate(text, voice=voice, speed=speed) # Create temporary file with UUID temp_dir = tempfile.gettempdir() unique_filename = f"kitten_tts_{uuid.uuid4()}.wav" output_path = os.path.join(temp_dir, unique_filename) # Save audio file sf.write(output_path, audio, 24000) return output_path except Exception as e: return None def get_available_voices(): """Get list of available voices from the model""" try: voices = model.available_voices return voices if voices else ["expr-voice-5-m"] # Default voice as fallback except: return ["expr-voice-5-m"] # Default voice as fallback # Get available voices available_voices = get_available_voices() # Create Gradio interface with gr.Blocks(title="KittenTTS - Text to Speech", theme=gr.themes.Soft()) as app: gr.Markdown("# 🐱 KittenTTS - Text to Speech Generator") gr.Markdown("Convert your text to high-quality speech using KittenTTS nano model!") with gr.Row(): with gr.Column(scale=2): # Input components text_input = gr.Textbox( label="Text to Convert", placeholder="Enter the text you want to convert to speech...", lines=4, max_lines=10 ) with gr.Row(): voice_dropdown = gr.Dropdown( choices=available_voices, value=available_voices[0] if available_voices else "expr-voice-5-m", label="Voice Selection", info="Choose the voice for speech generation" ) speed_slider = gr.Slider( minimum=0.5, maximum=2.0, step=0.01, value=1.25, label="Speech Speed", info="Adjust the speed of speech (0.5x to 2.0x)" ) generate_btn = gr.Button("đŸŽĩ Generate Speech", variant="primary", size="lg") with gr.Column(scale=1): # Output components audio_output = gr.Audio( label="Generated Speech", type="filepath", interactive=False, autoplay=True ) # Example inputs gr.Markdown("## 📝 Example Texts") examples = gr.Examples( examples=[ ["Hello! This is a test of the KittenTTS model.", available_voices[2] if available_voices else "expr-voice-5-m", 1.25], ["The quick brown fox jumps over the lazy dog.", available_voices[1] if available_voices else "expr-voice-5-m", 1.5], ["Welcome to the world of high-quality text-to-speech synthesis!", available_voices[5] if available_voices else "expr-voice-5-m", 1], ], inputs=[text_input, voice_dropdown, speed_slider], outputs=[audio_output], fn=generate_speech, label="Click on an example to try it out", cache_examples = "lazy" ) # Model information with gr.Accordion("â„šī¸ Model Information", open=False): gr.Markdown(""" **Model:** KittenML/kitten-tts-nano-0.1 **Features:** - High-quality text-to-speech synthesis - Works without GPU acceleration - Multiple voice options - Adjustable speech speed - 24kHz audio output **Usage:** 1. Enter your text in the text box 2. Select a voice from the dropdown 3. Adjust the speech speed if needed 4. Click "Generate Speech" to create audio Generated files are saved in temporary directory with unique UUID filenames. """) # Event handlers generate_btn.click( fn=generate_speech, inputs=[text_input, voice_dropdown, speed_slider], outputs=[audio_output] ) # Auto-generate on Enter key (optional) text_input.submit( fn=generate_speech, inputs=[text_input, voice_dropdown, speed_slider], outputs=[audio_output] ) # Launch the app if __name__ == "__main__": app.queue(default_concurrency_limit=100).launch()