import torch import soundfile as sf import gradio as gr from parler_tts import ParlerTTSForConditionalGeneration from transformers import AutoTokenizer # Voice list speaker_names = [ "Thoma", "Mary", "Swapna", "Dinesh", "Meera", "Jatin", "Aakash", "Sneha", "Kabir", "Tisha", "Chingkhei", "Thoiba", "Priya", "Tarun", "Gauri", "Nisha", "Raghav", "Kavya", "Ravi", "Vikas", "Riya" ] # Load model and tokenizers device = "cuda:0" if torch.cuda.is_available() else "cpu" model = ParlerTTSForConditionalGeneration.from_pretrained("ai4bharat/indic-parler-tts").to(device) tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts") description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder._name_or_path) def generate_tts(prompt, speaker): description = ( f"{speaker} speaks in a warm, neutral Indian English accent with a moderate pitch and steady pace. " "tone is friendly yet professional, making listeners feel welcome and comfortable. " "The voice is clear and well-articulated, with no regional inflections, and the recording is high-quality with no background noise." ) desc_ids = description_tokenizer(description, return_tensors="pt").to(device) prompt_ids = tokenizer(prompt, return_tensors="pt").to(device) with torch.no_grad(): generation = model.generate( input_ids=desc_ids.input_ids, attention_mask=desc_ids.attention_mask, prompt_input_ids=prompt_ids.input_ids, prompt_attention_mask=prompt_ids.attention_mask, ) audio_arr = generation.cpu().numpy().squeeze() wav_path = "parler_tts_output.wav" sf.write(wav_path, audio_arr, model.config.sampling_rate) return wav_path def parler_gradio_interface(prompt, speaker): return generate_tts(prompt, speaker) # Gradio UI iface = gr.Interface( fn=parler_gradio_interface, inputs=[ gr.Textbox(label="Enter text (Indian English)", lines=2), gr.Dropdown(label="Choose Voice", choices=speaker_names, value=speaker_names[0]) ], outputs=gr.Audio(type="filepath", label="Generated Audio"), title="Indic Parler-TTS Voice Generator", description="Enter your text, select a voice, and click Generate to hear Indian English TTS with chosen style.", allow_flagging="never" ) if __name__ == "__main__": iface.launch()