xd

Running

App Files Files Community

xd / gradio_ui.py

jnjj

Update gradio_ui.py

5baf180 verified 4 months ago

raw

history blame contribute delete

4.52 kB

	import gradio as gr
	import os
	import re
	from whisper_tts import WhisperTTS
	from ollama_chatbotTTS import OllamaChat
	from text_to_speech import TextToSpeech
	from sync_audio_video import AudioVideoSync

	# Instalación y arranque de Ollama
	os.system("curl https://ollama.com/install.sh \| sh")
	os.system("ollama serve &")

	# Directorios
	THUMBNAILS_DIR = "thumbnails"
	VIDEO_DIR = "sample_video"

	def get_thumbnail_images():
	if not os.path.exists(THUMBNAILS_DIR):
	return []
	return [
	(os.path.splitext(f)[0], os.path.join(THUMBNAILS_DIR, f))
	for f in os.listdir(THUMBNAILS_DIR)
	if f.lower().endswith((".png", ".jpg", ".jpeg"))
	]

	thumbnail_images = get_thumbnail_images()
	avatar_names = [name for name, _ in thumbnail_images]

	def find_matching_video(file_name):
	file_name = file_name.lower()
	if not os.path.exists(VIDEO_DIR):
	return None
	for video in os.listdir(VIDEO_DIR):
	name, ext = os.path.splitext(video)
	if name.lower() == file_name and ext.lower() in (".mp4", ".avi", ".mov"):
	return os.path.join(VIDEO_DIR, video)
	return None

	def update_avatar_display(selected_name):
	for name, img_path in thumbnail_images:
	if name == selected_name:
	return img_path
	return None

	def check_enable_process_button(selected_name, audio_file, transcribed_text):
	if selected_name and (audio_file or transcribed_text.strip()):
	return gr.update(interactive=True)
	return gr.update(interactive=False)

	def process_pipeline(audio_file, transcribed_text, selected_name):
	# 1) Si hay audio, transcribir
	if audio_file:
	whisper = WhisperTTS()
	transcribed_text = whisper.transcribe_audio(audio_file)
	yield transcribed_text, "", None, None

	# 2) Validar texto
	if not transcribed_text.strip():
	yield "Warning: Please provide valid text.", "", None, None
	return

	# 3) Chatbot
	ollama = OllamaChat()
	resp = ollama.get_response(transcribed_text)
	resp = re.sub(r"<think>\|</think>", "", resp).strip()
	yield transcribed_text, resp, None, None

	if not resp:
	yield transcribed_text, "Warning: No chatbot response.", None, None
	return

	# 4) TTS
	tts = TextToSpeech()
	audio_out = tts.synthesize(resp)
	yield transcribed_text, resp, audio_out, None

	# 5) Video
	if not selected_name:
	yield transcribed_text, resp, audio_out, "Warning: Select an avatar."
	return

	vid_in = find_matching_video(selected_name)
	if not vid_in:
	yield transcribed_text, resp, audio_out, "Warning: No matching video."
	return

	sync = AudioVideoSync()
	vid_out = sync.sync_audio_video(vid_in, audio_out)
	yield transcribed_text, resp, audio_out, vid_out

	def build_demo() -> gr.Blocks:
	with gr.Blocks() as demo:
	gr.Markdown("## Personalized Avatar Video")

	with gr.Row():
	with gr.Column():
	audio_input = gr.Audio(type="filepath", label="Audio Input")
	transcribed_text = gr.Textbox(label="Edit and Process Text")
	chatbot_resp = gr.Textbox(label="Assistant Response")
	gr.Markdown("### Select an Avatar")
	selected_avatar = gr.Radio(choices=avatar_names, label="Select an Avatar")
	avatar_display = gr.Image(label="Selected Avatar", width=150, height=150)
	process_btn = gr.Button("Generate Lip-Sync Video", interactive=False)

	with gr.Column():
	tts_audio = gr.Audio(label="Generated Speech")
	video_out = gr.Video(label="Final Lip-Synced Video")

	# Enlazar eventos
	selected_avatar.change(update_avatar_display, inputs=[selected_avatar], outputs=[avatar_display])
	for inp in (selected_avatar, audio_input, transcribed_text):
	inp.change(check_enable_process_button,
	inputs=[selected_avatar, audio_input, transcribed_text],
	outputs=[process_btn])

	process_btn.click(
	process_pipeline,
	inputs=[audio_input, transcribed_text, selected_avatar],
	outputs=[transcribed_text, chatbot_resp, tts_audio, video_out],
	)

	# Configurar la cola
	demo = demo.queue(max_size=100000)
	return demo

	if __name__ == "__main__":
	demo = build_demo()
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True,
	inbrowser=True,
	)