Spaces:
Running
Running
File size: 4,971 Bytes
fd8eada aed6b70 fd8eada aed6b70 fd8eada 3ab6181 fd8eada aed6b70 3ab6181 fd8eada 3ab6181 fd8eada aed6b70 3ab6181 aed6b70 fd8eada a71a89a aed6b70 532394f fd8eada aed6b70 fd8eada aed6b70 fd8eada 4da9e15 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
import gradio as gr
import tempfile
import uuid
import os
from kittentts import KittenTTS
import soundfile as sf
# Initialize the TTS model
model = KittenTTS("KittenML/kitten-tts-nano-0.1")
def generate_speech(text, voice, speed):
"""
Generate speech from text using KittenTTS
Args:
text (str): Text to convert to speech
voice (str): Voice to use for generation
speed (float): Speed of speech generation
Returns:
str: Path to generated audio file
"""
if not text.strip():
return None, "Please enter some text to generate speech."
try:
# Generate audio
audio = model.generate(text, voice=voice, speed=speed)
# Create temporary file with UUID
temp_dir = tempfile.gettempdir()
unique_filename = f"kitten_tts_{uuid.uuid4()}.wav"
output_path = os.path.join(temp_dir, unique_filename)
# Save audio file
sf.write(output_path, audio, 24000)
return output_path
except Exception as e:
return None
def get_available_voices():
"""Get list of available voices from the model"""
try:
voices = model.available_voices
return voices if voices else ["expr-voice-5-m"] # Default voice as fallback
except:
return ["expr-voice-5-m"] # Default voice as fallback
# Get available voices
available_voices = get_available_voices()
# Create Gradio interface
with gr.Blocks(title="KittenTTS - Text to Speech", theme=gr.themes.Soft()) as app:
gr.Markdown("# π± KittenTTS - Text to Speech Generator")
gr.Markdown("Convert your text to high-quality speech using KittenTTS nano model!")
with gr.Row():
with gr.Column(scale=2):
# Input components
text_input = gr.Textbox(
label="Text to Convert",
placeholder="Enter the text you want to convert to speech...",
lines=4,
max_lines=10
)
with gr.Row():
voice_dropdown = gr.Dropdown(
choices=available_voices,
value=available_voices[0] if available_voices else "expr-voice-5-m",
label="Voice Selection",
info="Choose the voice for speech generation"
)
speed_slider = gr.Slider(
minimum=0.5,
maximum=2.0,
step=0.01,
value=1.25,
label="Speech Speed",
info="Adjust the speed of speech (0.5x to 2.0x)"
)
generate_btn = gr.Button("π΅ Generate Speech", variant="primary", size="lg")
with gr.Column(scale=1):
# Output components
audio_output = gr.Audio(
label="Generated Speech",
type="filepath",
interactive=False,
autoplay=True
)
# Example inputs
gr.Markdown("## π Example Texts")
examples = gr.Examples(
examples=[
["Hello! This is a test of the KittenTTS model.", available_voices[2] if available_voices else "expr-voice-5-m", 1.25],
["The quick brown fox jumps over the lazy dog.", available_voices[1] if available_voices else "expr-voice-5-m", 1.5],
["Welcome to the world of high-quality text-to-speech synthesis!", available_voices[5] if available_voices else "expr-voice-5-m", 1],
],
inputs=[text_input, voice_dropdown, speed_slider],
outputs=[audio_output],
fn=generate_speech,
label="Click on an example to try it out",
cache_examples = "lazy"
)
# Model information
with gr.Accordion("βΉοΈ Model Information", open=False):
gr.Markdown("""
**Model:** KittenML/kitten-tts-nano-0.1
**Features:**
- High-quality text-to-speech synthesis
- Works without GPU acceleration
- Multiple voice options
- Adjustable speech speed
- 24kHz audio output
**Usage:**
1. Enter your text in the text box
2. Select a voice from the dropdown
3. Adjust the speech speed if needed
4. Click "Generate Speech" to create audio
Generated files are saved in temporary directory with unique UUID filenames.
""")
# Event handlers
generate_btn.click(
fn=generate_speech,
inputs=[text_input, voice_dropdown, speed_slider],
outputs=[audio_output]
)
# Auto-generate on Enter key (optional)
text_input.submit(
fn=generate_speech,
inputs=[text_input, voice_dropdown, speed_slider],
outputs=[audio_output]
)
# Launch the app
if __name__ == "__main__":
app.queue(default_concurrency_limit=100).launch() |