Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
from struct_caption import StructCaptioner | |
from fusion_caption import FusionCaptioner | |
# Khởi tạo mô hình | |
struct_captioner = StructCaptioner("Skywork/SkyCaptioner-V1") | |
fusion_captioner = FusionCaptioner("Qwen/Qwen3-8B") | |
# Tải mô hình dịch tiếng Việt | |
translation_model = AutoModelForSeq2SeqLM.from_pretrained("VietAI/envit5-translation") | |
translation_tokenizer = AutoTokenizer.from_pretrained("VietAI/envit5-translation") | |
def translate_to_vietnamese(text): | |
inputs = translation_tokenizer(f"en: {text}", return_tensors="pt", padding=True) | |
outputs = translation_model.generate(**inputs, max_length=512) | |
return translation_tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Giao diện người dùng Gradio | |
with gr.Blocks() as demo: | |
gr.Markdown("<h1 style='text-align: center;'>SkyCaptioner-V1</h1>") | |
with gr.Row(): | |
with gr.Column(scale=0.5): | |
video_input = gr.Video(label="Upload Video", interactive=True, format="mp4") | |
btn_struct = gr.Button("Generate Struct Caption") | |
with gr.Column(): | |
struct_caption_output = gr.Code(label="Struct Caption", language="json", lines=25, interactive=False) | |
with gr.Row(): | |
with gr.Column(scale=0.5): | |
task_input = gr.Radio(label="Task Type", choices=["t2v", "i2v"], value="t2v", interactive=True) | |
btn_fusion = gr.Button("Generate Fusion Caption") | |
with gr.Column(): | |
fusion_caption_output = gr.Textbox(label="Fusion Caption", value="", interactive=False) | |
def generate_struct_caption(video): | |
struct_caption = struct_captioner(video) | |
return struct_caption | |
def generate_fusion_caption(struct_caption_str, task): | |
fusion_caption = fusion_captioner(struct_caption_str, task) | |
if task == "t2v": | |
return fusion_caption | |
else: | |
return translate_to_vietnamese(fusion_caption) | |
gr.Examples( | |
examples=[["./examples/1.mp4"], ["./examples/2.mp4"], ["./examples/3.mp4"], ["./examples/4.mp4"]], | |
inputs=video_input, | |
label="Example Videos" | |
) | |
demo.launch() | |