Spaces:
Running
Running
import torch | |
import soundfile as sf | |
import gradio as gr | |
from parler_tts import ParlerTTSForConditionalGeneration | |
from transformers import AutoTokenizer | |
# Voice list | |
speaker_names = [ | |
"Thoma", "Mary", "Swapna", "Dinesh", "Meera", "Jatin", "Aakash", "Sneha", "Kabir", "Tisha", | |
"Chingkhei", "Thoiba", "Priya", "Tarun", "Gauri", "Nisha", "Raghav", "Kavya", "Ravi", "Vikas", "Riya" | |
] | |
# Load model and tokenizers | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
model = ParlerTTSForConditionalGeneration.from_pretrained("ai4bharat/indic-parler-tts").to(device) | |
tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts") | |
description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder._name_or_path) | |
def generate_tts(prompt, speaker): | |
description = ( | |
f"{speaker} speaks in a warm, neutral Indian English accent with a moderate pitch and steady pace. " | |
"tone is friendly yet professional, making listeners feel welcome and comfortable. " | |
"The voice is clear and well-articulated, with no regional inflections, and the recording is high-quality with no background noise." | |
) | |
desc_ids = description_tokenizer(description, return_tensors="pt").to(device) | |
prompt_ids = tokenizer(prompt, return_tensors="pt").to(device) | |
with torch.no_grad(): | |
generation = model.generate( | |
input_ids=desc_ids.input_ids, | |
attention_mask=desc_ids.attention_mask, | |
prompt_input_ids=prompt_ids.input_ids, | |
prompt_attention_mask=prompt_ids.attention_mask, | |
) | |
audio_arr = generation.cpu().numpy().squeeze() | |
wav_path = "parler_tts_output.wav" | |
sf.write(wav_path, audio_arr, model.config.sampling_rate) | |
return wav_path | |
def parler_gradio_interface(prompt, speaker): | |
return generate_tts(prompt, speaker) | |
# Gradio UI | |
iface = gr.Interface( | |
fn=parler_gradio_interface, | |
inputs=[ | |
gr.Textbox(label="Enter text (Indian English)", lines=2), | |
gr.Dropdown(label="Choose Voice", choices=speaker_names, value=speaker_names[0]) | |
], | |
outputs=gr.Audio(type="filepath", label="Generated Audio"), | |
title="Indic Parler-TTS Voice Generator", | |
description="Enter your text, select a voice, and click Generate to hear Indian English TTS with chosen style.", | |
allow_flagging="never" | |
) | |
if __name__ == "__main__": | |
iface.launch() | |