|
|
|
import gradio as gr |
|
import torch |
|
from transformers import AutoModel, AutoTokenizer |
|
import torchvision.transforms as T |
|
from torchvision.transforms.functional import InterpolationMode |
|
from PIL import Image |
|
import time |
|
import json |
|
import traceback |
|
|
|
|
|
device = "cpu" |
|
model = None |
|
tokenizer = None |
|
transform = None |
|
|
|
def build_transform(input_size=448): |
|
"""Optimized transform""" |
|
IMAGENET_MEAN = (0.485, 0.456, 0.406) |
|
IMAGENET_STD = (0.229, 0.224, 0.225) |
|
|
|
return T.Compose([ |
|
T.Lambda(lambda img: img.convert('RGB') if hasattr(img, 'mode') and img.mode != 'RGB' else img), |
|
T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC), |
|
T.ToTensor(), |
|
T.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD) |
|
]) |
|
|
|
def load_model(): |
|
"""Load Vintern-1B (faster version)""" |
|
global model, tokenizer, transform |
|
try: |
|
print("🚀 Loading Vintern-1B (Fast Version)...") |
|
|
|
|
|
model_name = "5CD-AI/Vintern-1B-v2" |
|
|
|
tokenizer = AutoTokenizer.from_pretrained( |
|
model_name, |
|
trust_remote_code=True |
|
) |
|
|
|
model = AutoModel.from_pretrained( |
|
model_name, |
|
torch_dtype=torch.float32, |
|
trust_remote_code=True, |
|
low_cpu_mem_usage=True |
|
) |
|
|
|
|
|
model.eval() |
|
model = torch.jit.optimize_for_inference(model) |
|
|
|
transform = build_transform() |
|
|
|
print("✅ Fast model loaded!") |
|
return True |
|
|
|
except Exception as e: |
|
print(f"❌ Error: {e}") |
|
traceback.print_exc() |
|
return False |
|
|
|
def fast_analyze(image): |
|
"""Optimized analysis function""" |
|
if model is None: |
|
return "❌ Model chưa sẵn sàng" |
|
|
|
try: |
|
start_time = time.time() |
|
|
|
|
|
if image is None: |
|
return "❌ Không có ảnh" |
|
|
|
if hasattr(image, 'mode') and image.mode != 'RGB': |
|
image = image.convert('RGB') |
|
|
|
|
|
image_tensor = transform(image).unsqueeze(0).to(device) |
|
|
|
with torch.no_grad(): |
|
|
|
query = "Mô tả ngắn gọn:" |
|
|
|
try: |
|
result = model.chat( |
|
tokenizer, |
|
image_tensor, |
|
query, |
|
generation_config=dict( |
|
max_new_tokens=100, |
|
do_sample=False, |
|
temperature=0.7, |
|
num_beams=1 |
|
) |
|
) |
|
except: |
|
|
|
inputs = tokenizer(query, return_tensors="pt").to(device) |
|
outputs = model.generate( |
|
**inputs, |
|
max_new_tokens=80, |
|
do_sample=False, |
|
num_beams=1 |
|
) |
|
result = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
result = result.replace(query, "").strip() |
|
|
|
processing_time = time.time() - start_time |
|
|
|
return f"""**📝 Mô tả nhanh:** |
|
{result} |
|
|
|
**⚡ Thời gian:** {processing_time:.1f}s |
|
**🤖 Model:** Vintern-1B-v2 (Optimized) |
|
**💨 Tốc độ:** {1/processing_time:.1f} FPS |
|
|
|
--- |
|
*Model được tối ưu cho tốc độ - phù hợp real-time* |
|
""" |
|
|
|
except Exception as e: |
|
return f"❌ Lỗi: {str(e)}" |
|
|
|
|
|
print("🚀 Starting Fast Vintern Server...") |
|
model_loaded = load_model() |
|
|
|
|
|
with gr.Blocks( |
|
title="Vintern-1B Fast", |
|
theme=gr.themes.Base(), |
|
) as demo: |
|
|
|
gr.Markdown("# ⚡ Vintern-1B Fast - Tốc Độ Cao") |
|
|
|
if model_loaded: |
|
gr.Markdown("✅ **Model sẵn sàng!** Tối ưu cho tốc độ và real-time.") |
|
|
|
with gr.Row(): |
|
image_input = gr.Image(type="pil", label="📤 Upload Ảnh") |
|
result_output = gr.Textbox( |
|
label="📋 Kết Quả", |
|
lines=8, |
|
show_copy_button=True |
|
) |
|
|
|
|
|
image_input.change( |
|
fn=fast_analyze, |
|
inputs=image_input, |
|
outputs=result_output |
|
) |
|
|
|
gr.Markdown(""" |
|
### ⚡ Tối ưu cho tốc độ: |
|
- **Model nhẹ**: Vintern-1B-v2 (~1.5GB) |
|
- **Fast generation**: Greedy decode, short output |
|
- **Optimized**: JIT compilation, no beam search |
|
- **Real-time ready**: ~2-5 giây/ảnh |
|
""") |
|
|
|
if __name__ == "__main__": |
|
demo.launch(server_name="0.0.0.0", server_port=7860) |