import gradio as gr import cv2 import numpy as np from collections import defaultdict from transformers import pipeline # Initialize the model (now works with timm installed) detector = pipeline("object-detection", model="facebook/detr-resnet-101") # Global counter object_counter = defaultdict(int) def process_video(video_path): cap = cv2.VideoCapture(video_path) while cap.isOpened(): ret, frame = cap.read() if not ret: break rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) results = detector(rgb_frame, threshold=0.7) for obj in results: label = obj["label"] object_counter[label] += 1 box = obj["box"] xmin, ymin, xmax, ymax = int(box["xmin"]), int(box["ymin"]), int(box["xmax"]), int(box["ymax"]) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2) cv2.putText(frame, f"{label}", (xmin, ymin-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2) counter_text = "\n".join([f"{k}: {v}" for k, v in object_counter.items()]) cv2.putText(frame, counter_text, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) yield cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) cap.release() with gr.Blocks() as demo: gr.Markdown("# 🎥 Video Object Detection") video_input = gr.Video(label="Upload Video") video_output = gr.Image(label="Detections") reset_button = gr.Button("Reset Counter") video_input.change(process_video, video_input, video_output) reset_button.click(lambda: object_counter.clear()) demo.launch()