|
import gradio as gr |
|
import os |
|
import re |
|
from whisper_tts import WhisperTTS |
|
from ollama_chatbotTTS import OllamaChat |
|
from text_to_speech import TextToSpeech |
|
from sync_audio_video import AudioVideoSync |
|
|
|
|
|
os.system("curl https://ollama.com/install.sh | sh") |
|
os.system("ollama serve &") |
|
|
|
|
|
THUMBNAILS_DIR = "thumbnails" |
|
VIDEO_DIR = "sample_video" |
|
|
|
def get_thumbnail_images(): |
|
if not os.path.exists(THUMBNAILS_DIR): |
|
return [] |
|
return [ |
|
(os.path.splitext(f)[0], os.path.join(THUMBNAILS_DIR, f)) |
|
for f in os.listdir(THUMBNAILS_DIR) |
|
if f.lower().endswith((".png", ".jpg", ".jpeg")) |
|
] |
|
|
|
thumbnail_images = get_thumbnail_images() |
|
avatar_names = [name for name, _ in thumbnail_images] |
|
|
|
def find_matching_video(file_name): |
|
file_name = file_name.lower() |
|
if not os.path.exists(VIDEO_DIR): |
|
return None |
|
for video in os.listdir(VIDEO_DIR): |
|
name, ext = os.path.splitext(video) |
|
if name.lower() == file_name and ext.lower() in (".mp4", ".avi", ".mov"): |
|
return os.path.join(VIDEO_DIR, video) |
|
return None |
|
|
|
def update_avatar_display(selected_name): |
|
for name, img_path in thumbnail_images: |
|
if name == selected_name: |
|
return img_path |
|
return None |
|
|
|
def check_enable_process_button(selected_name, audio_file, transcribed_text): |
|
if selected_name and (audio_file or transcribed_text.strip()): |
|
return gr.update(interactive=True) |
|
return gr.update(interactive=False) |
|
|
|
def process_pipeline(audio_file, transcribed_text, selected_name): |
|
|
|
if audio_file: |
|
whisper = WhisperTTS() |
|
transcribed_text = whisper.transcribe_audio(audio_file) |
|
yield transcribed_text, "", None, None |
|
|
|
|
|
if not transcribed_text.strip(): |
|
yield "Warning: Please provide valid text.", "", None, None |
|
return |
|
|
|
|
|
ollama = OllamaChat() |
|
resp = ollama.get_response(transcribed_text) |
|
resp = re.sub(r"<think>|</think>", "", resp).strip() |
|
yield transcribed_text, resp, None, None |
|
|
|
if not resp: |
|
yield transcribed_text, "Warning: No chatbot response.", None, None |
|
return |
|
|
|
|
|
tts = TextToSpeech() |
|
audio_out = tts.synthesize(resp) |
|
yield transcribed_text, resp, audio_out, None |
|
|
|
|
|
if not selected_name: |
|
yield transcribed_text, resp, audio_out, "Warning: Select an avatar." |
|
return |
|
|
|
vid_in = find_matching_video(selected_name) |
|
if not vid_in: |
|
yield transcribed_text, resp, audio_out, "Warning: No matching video." |
|
return |
|
|
|
sync = AudioVideoSync() |
|
vid_out = sync.sync_audio_video(vid_in, audio_out) |
|
yield transcribed_text, resp, audio_out, vid_out |
|
|
|
def build_demo() -> gr.Blocks: |
|
with gr.Blocks() as demo: |
|
gr.Markdown("## Personalized Avatar Video") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
audio_input = gr.Audio(type="filepath", label="Audio Input") |
|
transcribed_text = gr.Textbox(label="Edit and Process Text") |
|
chatbot_resp = gr.Textbox(label="Assistant Response") |
|
gr.Markdown("### Select an Avatar") |
|
selected_avatar = gr.Radio(choices=avatar_names, label="Select an Avatar") |
|
avatar_display = gr.Image(label="Selected Avatar", width=150, height=150) |
|
process_btn = gr.Button("Generate Lip-Sync Video", interactive=False) |
|
|
|
with gr.Column(): |
|
tts_audio = gr.Audio(label="Generated Speech") |
|
video_out = gr.Video(label="Final Lip-Synced Video") |
|
|
|
|
|
selected_avatar.change(update_avatar_display, inputs=[selected_avatar], outputs=[avatar_display]) |
|
for inp in (selected_avatar, audio_input, transcribed_text): |
|
inp.change(check_enable_process_button, |
|
inputs=[selected_avatar, audio_input, transcribed_text], |
|
outputs=[process_btn]) |
|
|
|
process_btn.click( |
|
process_pipeline, |
|
inputs=[audio_input, transcribed_text, selected_avatar], |
|
outputs=[transcribed_text, chatbot_resp, tts_audio, video_out], |
|
) |
|
|
|
|
|
demo = demo.queue(max_size=100000) |
|
return demo |
|
|
|
if __name__ == "__main__": |
|
demo = build_demo() |
|
demo.launch( |
|
server_name="0.0.0.0", |
|
server_port=7860, |
|
share=True, |
|
inbrowser=True, |
|
) |
|
|