Spaces:

Ragulravi
/

indic-parler-tts

Running

App Files Files Community

indic-parler-tts / app.py

Ragulravi

Update app.py

45ffb4a verified 23 days ago

raw

history blame contribute delete

2.38 kB

	import torch
	import soundfile as sf
	import gradio as gr
	from parler_tts import ParlerTTSForConditionalGeneration
	from transformers import AutoTokenizer

	# Voice list
	speaker_names = [
	"Thoma", "Mary", "Swapna", "Dinesh", "Meera", "Jatin", "Aakash", "Sneha", "Kabir", "Tisha",
	"Chingkhei", "Thoiba", "Priya", "Tarun", "Gauri", "Nisha", "Raghav", "Kavya", "Ravi", "Vikas", "Riya"
	]

	# Load model and tokenizers
	device = "cuda:0" if torch.cuda.is_available() else "cpu"
	model = ParlerTTSForConditionalGeneration.from_pretrained("ai4bharat/indic-parler-tts").to(device)
	tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts")
	description_tokenizer = AutoTokenizer.from_pretrained(model.config.text_encoder._name_or_path)

	def generate_tts(prompt, speaker):
	description = (
	f"{speaker} speaks in a warm, neutral Indian English accent with a moderate pitch and steady pace. "
	"tone is friendly yet professional, making listeners feel welcome and comfortable. "
	"The voice is clear and well-articulated, with no regional inflections, and the recording is high-quality with no background noise."
	)
	desc_ids = description_tokenizer(description, return_tensors="pt").to(device)
	prompt_ids = tokenizer(prompt, return_tensors="pt").to(device)
	with torch.no_grad():
	generation = model.generate(
	input_ids=desc_ids.input_ids,
	attention_mask=desc_ids.attention_mask,
	prompt_input_ids=prompt_ids.input_ids,
	prompt_attention_mask=prompt_ids.attention_mask,
	)
	audio_arr = generation.cpu().numpy().squeeze()
	wav_path = "parler_tts_output.wav"
	sf.write(wav_path, audio_arr, model.config.sampling_rate)
	return wav_path

	def parler_gradio_interface(prompt, speaker):
	return generate_tts(prompt, speaker)

	# Gradio UI
	iface = gr.Interface(
	fn=parler_gradio_interface,
	inputs=[
	gr.Textbox(label="Enter text (Indian English)", lines=2),
	gr.Dropdown(label="Choose Voice", choices=speaker_names, value=speaker_names[0])
	],
	outputs=gr.Audio(type="filepath", label="Generated Audio"),
	title="Indic Parler-TTS Voice Generator",
	description="Enter your text, select a voice, and click Generate to hear Indian English TTS with chosen style.",
	allow_flagging="never"
	)

	if __name__ == "__main__":
	iface.launch()