File size: 1,639 Bytes
d10c3f2
 
 
 
d50703f
d10c3f2
4544a40
d50703f
d10c3f2
d50703f
d10c3f2
 
 
 
 
 
 
 
 
 
d50703f
d10c3f2
 
 
 
4544a40
d10c3f2
 
4544a40
d10c3f2
 
d50703f
 
d10c3f2
 
 
 
4544a40
 
 
 
 
 
d10c3f2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import gradio as gr
import cv2
import numpy as np
from collections import defaultdict
from transformers import pipeline

# Initialize the model (now works with timm installed)
detector = pipeline("object-detection", model="facebook/detr-resnet-101")

# Global counter
object_counter = defaultdict(int)

def process_video(video_path):
    cap = cv2.VideoCapture(video_path)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = detector(rgb_frame, threshold=0.7)
        
        for obj in results:
            label = obj["label"]
            object_counter[label] += 1
            box = obj["box"]
            xmin, ymin, xmax, ymax = int(box["xmin"]), int(box["ymin"]), int(box["xmax"]), int(box["ymax"])
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
            cv2.putText(frame, f"{label}", (xmin, ymin-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
        
        counter_text = "\n".join([f"{k}: {v}" for k, v in object_counter.items()])
        cv2.putText(frame, counter_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
        yield cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    cap.release()

with gr.Blocks() as demo:
    gr.Markdown("# 🎥 Video Object Detection")
    video_input = gr.Video(label="Upload Video")
    video_output = gr.Image(label="Detections")
    reset_button = gr.Button("Reset Counter")
    video_input.change(process_video, video_input, video_output)
    reset_button.click(lambda: object_counter.clear())

demo.launch()