Spaces:

nguyentantoan
/

vintern-video-recognition

Sleeping

App Files Files Community

nguyentantoan commited on May 23

Commit

cb8d367

verified ·

1 Parent(s): 63fea4e

Update app.py

Browse files

Files changed (1) hide show

app.py +234 -154

app.py CHANGED Viewed

@@ -1,154 +1,234 @@
-import gradio as gr
-import torch
-from transformers import AutoModel, AutoTokenizer
-import torchvision.transforms as T
-from torchvision.transforms.functional import InterpolationMode
-from PIL import Image
-import base64
-import io
-import time
-# Setup
-device = "cpu"  # HF Spaces miễn phí chỉ có CPU
-model = None
-tokenizer = None
-transform = None
-def load_model():
-    global model, tokenizer, transform
-    try:
-        print("🤖 Loading Vintern-1B-v3.5...")
-        model_name = "5CD-AI/Vintern-1B-v3_5"
-        tokenizer = AutoTokenizer.from_pretrained(
-            model_name,
-            trust_remote_code=True
-        )
-        model = AutoModel.from_pretrained(
-            model_name,
-            torch_dtype=torch.float32,
-            trust_remote_code=True,
-            low_cpu_mem_usage=True
-        )
-        # Image transform
-        IMAGENET_MEAN = (0.485, 0.456, 0.406)
-        IMAGENET_STD = (0.229, 0.224, 0.225)
-        transform = T.Compose([
-            T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
-            T.Resize((448, 448), interpolation=InterpolationMode.BICUBIC),
-            T.ToTensor(),
-            T.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
-        ])
-        print("✅ Model loaded successfully!")
-        return True
-    except Exception as e:
-        print(f"❌ Error loading model: {e}")
-        return False
-def analyze_image(image):
-    if model is None:
-        return "❌ Model chưa được tải. Vui lòng chờ..."
-    try:
-        start_time = time.time()
-        # Preprocess image
-        if isinstance(image, str):
-            # Base64 image
-            if image.startswith('data:image'):
-                image = image.split(',')[1]
-            image_bytes = base64.b64decode(image)
-            image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
-        image_tensor = transform(image).unsqueeze(0).to(device)
-        with torch.no_grad():
-            query = "Mô tả chi tiết những gì bạn thấy trong hình ảnh này:"
-            description = model.chat(
-                tokenizer,
-                image_tensor,
-                query,
-                generation_config=dict(
-                    max_new_tokens=200,
-                    do_sample=True,
-                    temperature=0.7,
-                    top_p=0.9,
-                    repetition_penalty=1.1
-                )
-            )
-            # Get objects
-            try:
-                object_query = "Liệt kê các đối tượng chính:"
-                objects_text = model.chat(
-                    tokenizer,
-                    image_tensor,
-                    object_query,
-                    generation_config=dict(max_new_tokens=100, temperature=0.5)
-                )
-                objects = [obj.strip() for obj in objects_text.replace(',', ' ').split() if len(obj.strip()) > 2][:5]
-                objects_str = ", ".join(objects) if objects else "Không có"
-            except:
-                objects_str = "Không có"
-            processing_time = time.time() - start_time
-            return f"""
-**📝 Mô tả từ Vintern AI:**
-{description}
-**🔍 Đối tượng nhận diện:**
-{objects_str}
-**⚡ Thời gian xử lý:** {processing_time:.2f}s
-**🤖 Model:** Vintern-1B-v3.5 (Hugging Face Spaces)
-"""
-    except Exception as e:
-        return f"❌ Lỗi phân tích: {str(e)}"
-# Load model khi khởi động
-print("🚀 Initializing Vintern-1B-v3.5...")
-model_loaded = load_model()
-# Gradio interface
-with gr.Blocks(title="Vintern-1B-v3.5 Video Recognition") as demo:
-    gr.Markdown("# 🎥 Vintern-1B-v3.5 - Nhận Diện Ảnh Tiếng Việt")
-    gr.Markdown("Upload ảnh để nhận diện nội dung bằng AI Vintern-1B-v3.5")
-    if not model_loaded:
-        gr.Markdown("⚠️ **Model đang được tải...** Vui lòng chờ vài phút.")
-    with gr.Row():
-        with gr.Column():
-            image_input = gr.Image(type="pil", label="📤 Upload Ảnh")
-            analyze_btn = gr.Button("🔍 Phân Tích", variant="primary")
-        with gr.Column():
-            result_output = gr.Textbox(label="📋 Kết Quả", lines=10, max_lines=15)
-    analyze_btn.click(
-        fn=analyze_image,
-        inputs=image_input,
-        outputs=result_output
-    )
-    gr.Markdown("""
-    ---
-    **💡 Hướng dẫn:**
-    1. Upload ảnh từ máy tính hoặc webcam
-    2. Nhấn "Phân Tích" để nhận diện
-    3. Xem kết quả mô tả tiếng Việt
-    **🔗 API Endpoint:** Sử dụng URL của Space này trong trangchu.html
-    """)
-if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

+import gradio as gr
+import torch
+from transformers import AutoModel, AutoTokenizer
+import torchvision.transforms as T
+from torchvision.transforms.functional import InterpolationMode
+from PIL import Image
+import base64
+import io
+import time
+import logging
+import warnings
+# Suppress warnings
+warnings.filterwarnings("ignore")
+logging.getLogger("transformers").setLevel(logging.ERROR)
+# Setup
+device = "cpu"  # HF Spaces miễn phí chỉ có CPU
+model = None
+tokenizer = None
+transform = None
+def load_model():
+    global model, tokenizer, transform
+    try:
+        print("🤖 Loading Vintern-1B-v3.5...")
+        model_name = "5CD-AI/Vintern-1B-v3_5"
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_name,
+            trust_remote_code=True
+        )
+        model = AutoModel.from_pretrained(
+            model_name,
+            torch_dtype=torch.float32,
+            trust_remote_code=True,
+            low_cpu_mem_usage=True
+        )
+        # Image transform
+        IMAGENET_MEAN = (0.485, 0.456, 0.406)
+        IMAGENET_STD = (0.229, 0.224, 0.225)
+        transform = T.Compose([
+            T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
+            T.Resize((448, 448), interpolation=InterpolationMode.BICUBIC),
+            T.ToTensor(),
+            T.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
+        ])
+        print("✅ Model loaded successfully!")
+        return True
+    except Exception as e:
+        print(f"❌ Error loading model: {e}")
+        return False
+def analyze_image(image):
+    """
+    Analyze image with proper error handling
+    """
+    if model is None:
+        return "❌ Model chưa được tải. Vui lòng chờ..."
+    if image is None:
+        return "❌ Không có ảnh để phân tích."
+    try:
+        start_time = time.time()
+        # Ensure image is PIL Image
+        if not isinstance(image, Image.Image):
+            return "❌ Định dạng ảnh không hợp lệ."
+        # Convert to RGB if needed
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        # Preprocess image
+        image_tensor = transform(image).unsqueeze(0).to(device)
+        with torch.no_grad():
+            query = "Mô tả chi tiết những gì bạn thấy trong hình ảnh này:"
+            description = model.chat(
+                tokenizer,
+                image_tensor,
+                query,
+                generation_config=dict(
+                    max_new_tokens=200,
+                    do_sample=True,
+                    temperature=0.7,
+                    top_p=0.9,
+                    repetition_penalty=1.1
+                )
+            )
+            # Get objects with error handling
+            objects_str = "Không có"
+            try:
+                object_query = "Liệt kê các đối tượng chính:"
+                objects_text = model.chat(
+                    tokenizer,
+                    image_tensor,
+                    object_query,
+                    generation_config=dict(max_new_tokens=100, temperature=0.5)
+                )
+                objects = [obj.strip() for obj in objects_text.replace(',', ' ').split() if len(obj.strip()) > 2][:5]
+                objects_str = ", ".join(objects) if objects else "Không có"
+            except Exception as obj_error:
+                print(f"Warning: Object detection failed: {obj_error}")
+                objects_str = "Không có"
+            processing_time = time.time() - start_time
+            # Format response
+            result = f"""**📝 Mô tả từ Vintern AI:**
+{description}
+**🔍 Đối tượng nhận diện:**
+{objects_str}
+**⚡ Thời gian xử lý:** {processing_time:.2f}s
+**🤖 Model:** Vintern-1B-v3.5 (Hugging Face Spaces)
+**📡 API Status:** ✅ Hoạt động bình thường"""
+            return result
+    except Exception as e:
+        error_msg = f"❌ Lỗi phân tích: {str(e)}"
+        print(f"Analysis error: {e}")
+        return error_msg
+# Load model khi khởi động
+print("🚀 Initializing Vintern-1B-v3.5...")
+model_loaded = load_model()
+# Custom CSS để ẩn một số warnings
+custom_css = """
+.gradio-container {
+    max-width: 1200px !important;
+}
+footer {
+    visibility: hidden;
+}
+"""
+# Gradio interface với error handling tốt hơn
+with gr.Blocks(
+    title="Vintern-1B-v3.5 Video Recognition",
+    css=custom_css,
+    theme=gr.themes.Soft()
+) as demo:
+    gr.Markdown("""
+    # 🎥 Vintern-1B-v3.5 - Nhận Diện Ảnh Tiếng Việt
+    Upload ảnh để nhận diện nội dung bằng AI Vintern-1B-v3.5
+    **🔧 API Ready:** Sử dụng `/api/predict` endpoint cho ứng dụng web của bạn!
+    """)
+    if not model_loaded:
+        gr.Markdown("⚠️ **Model đang được tải...** Vui lòng chờ vài phút và refresh trang.")
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(
+                type="pil",
+                label="📤 Upload Ảnh",
+                sources=["upload", "webcam", "clipboard"]
+            )
+            analyze_btn = gr.Button(
+                "🔍 Phân Tích",
+                variant="primary",
+                size="lg"
+            )
+        with gr.Column():
+            result_output = gr.Textbox(
+                label="📋 Kết Quả",
+                lines=12,
+                max_lines=20,
+                show_copy_button=True
+            )
+    # Event handlers với error handling
+    def safe_analyze(image):
+        try:
+            return analyze_image(image)
+        except Exception as e:
+            return f"❌ Lỗi hệ thống: {str(e)}"
+    analyze_btn.click(
+        fn=safe_analyze,
+        inputs=image_input,
+        outputs=result_output,
+        show_progress=True
+    )
+    # Auto-analyze on image upload
+    image_input.change(
+        fn=safe_analyze,
+        inputs=image_input,
+        outputs=result_output,
+        show_progress=True
+    )
+    gr.Markdown("""
+    ---
+    **💡 Hướng dẫn sử dụng:**
+    1. 📤 Upload ảnh từ máy tính, webcam hoặc clipboard
+    2. 🔍 Nhấn "Phân Tích" hoặc tự động phân tích khi upload
+    3. 📋 Xem kết quả mô tả tiếng Việt chi tiết
+    **🔗 API Usage:**
+    - **Endpoint:** `{space_url}/api/predict`
+    - **Method:** POST
+    - **Format:** FormData với image file
+    **⚡ Hiệu suất:** CPU-optimized cho Hugging Face Spaces miễn phí
+    """)
+# Launch với cấu hình tối ưu
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True,
+        quiet=False,  # Show logs for debugging
+        show_tips=False,
+        enable_queue=True,  # Handle multiple requests
+        max_threads=2  # Limit threads for free tier
+    )