import gradio as gr
import tempfile
import uuid
import os
from kittentts import KittenTTS
import soundfile as sf

# Initialize the TTS model
model = KittenTTS("KittenML/kitten-tts-nano-0.1")

def generate_speech(text, voice, speed):
    """
    Generate speech from text using KittenTTS
    
    Args:
        text (str): Text to convert to speech
        voice (str): Voice to use for generation
        speed (float): Speed of speech generation
    
    Returns:
        str: Path to generated audio file
    """
    if not text.strip():
        return None, "Please enter some text to generate speech."
    
    try:
        # Generate audio
        audio = model.generate(text, voice=voice, speed=speed)
        
        # Create temporary file with UUID
        temp_dir = tempfile.gettempdir()
        unique_filename = f"kitten_tts_{uuid.uuid4()}.wav"
        output_path = os.path.join(temp_dir, unique_filename)
        
        # Save audio file
        sf.write(output_path, audio, 24000)
        
        return output_path
        
    except Exception as e:
        return None

def get_available_voices():
    """Get list of available voices from the model"""
    try:
        voices = model.available_voices
        return voices if voices else ["expr-voice-5-m"]  # Default voice as fallback
    except:
        return ["expr-voice-5-m"]  # Default voice as fallback

# Get available voices
available_voices = get_available_voices()

# Create Gradio interface
with gr.Blocks(title="KittenTTS - Text to Speech", theme=gr.themes.Soft()) as app:
    gr.Markdown("# 🐱 KittenTTS - Text to Speech Generator")
    gr.Markdown("Convert your text to high-quality speech using KittenTTS nano model!")
    
    with gr.Row():
        with gr.Column(scale=2):
            # Input components
            text_input = gr.Textbox(
                label="Text to Convert",
                placeholder="Enter the text you want to convert to speech...",
                lines=4,
                max_lines=10
            )
            
            with gr.Row():
                voice_dropdown = gr.Dropdown(
                    choices=available_voices,
                    value=available_voices[0] if available_voices else "expr-voice-5-m",
                    label="Voice Selection",
                    info="Choose the voice for speech generation"
                )
                
                speed_slider = gr.Slider(
                    minimum=0.5,
                    maximum=2.0,
                    step=0.01,
                    value=1.25,
                    label="Speech Speed",
                    info="Adjust the speed of speech (0.5x to 2.0x)"
                )
            
            generate_btn = gr.Button("🎵 Generate Speech", variant="primary", size="lg")
            
        with gr.Column(scale=1):
            # Output components
            audio_output = gr.Audio(
                label="Generated Speech",
                type="filepath",
                interactive=False,
                autoplay=True
            )
    
    # Example inputs
    gr.Markdown("## 📝 Example Texts")
    examples = gr.Examples(
        examples=[
            ["Hello! This is a test of the KittenTTS model.", available_voices[2] if available_voices else "expr-voice-5-m", 1.25],
            ["The quick brown fox jumps over the lazy dog.", available_voices[1] if available_voices else "expr-voice-5-m", 1.5],
            ["Welcome to the world of high-quality text-to-speech synthesis!", available_voices[5] if available_voices else "expr-voice-5-m", 1],
        ],
        inputs=[text_input, voice_dropdown, speed_slider],
        outputs=[audio_output],
        fn=generate_speech,
        label="Click on an example to try it out",
        cache_examples = "lazy"
    )
    
    # Model information
    with gr.Accordion("ℹ️ Model Information", open=False):
        gr.Markdown("""
        **Model:** KittenML/kitten-tts-nano-0.1
        
        **Features:**
        - High-quality text-to-speech synthesis
        - Works without GPU acceleration
        - Multiple voice options
        - Adjustable speech speed
        - 24kHz audio output
        
        **Usage:**
        1. Enter your text in the text box
        2. Select a voice from the dropdown
        3. Adjust the speech speed if needed
        4. Click "Generate Speech" to create audio
        
        Generated files are saved in temporary directory with unique UUID filenames.
        """)
    
    # Event handlers
    generate_btn.click(
        fn=generate_speech,
        inputs=[text_input, voice_dropdown, speed_slider],
        outputs=[audio_output]
    )
    
    # Auto-generate on Enter key (optional)
    text_input.submit(
        fn=generate_speech,
        inputs=[text_input, voice_dropdown, speed_slider],
        outputs=[audio_output]
    )

# Launch the app
if __name__ == "__main__":
    app.queue(default_concurrency_limit=100).launch()