import gradio as gr import cv2 import numpy as np from object_detection import ObjectDetector import tempfile import os import torch import time # Initialize the detector try: detector = ObjectDetector() print("Detector initialized successfully") except Exception as e: print(f"Error initializing detector: {str(e)}") detector = None def process_video(video_path): if detector is None: return None, "Error: Detector initialization failed" try: start_time = time.time() # Create a temporary directory for processed frames with tempfile.TemporaryDirectory() as temp_dir: # Open the video cap = cv2.VideoCapture(video_path) if not cap.isOpened(): return None, "Error: Could not open video file" # Get video properties width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # Limit processing to first 100 frames for demo purposes max_frames = min(100, total_frames) # Create output video writer output_path = os.path.join(temp_dir, "output.mp4") fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) frame_count = 0 processed_frames = 0 while cap.isOpened() and frame_count < max_frames: ret, frame = cap.read() if not ret: break # Process frame try: results = detector.detect(frame) # Draw detections for det in results['detections']: x1, y1, x2, y2 = det['bbox'] cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2) cv2.putText(frame, f"{det['class']} {det['confidence']:.2f}", (int(x1), int(y1)-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) # Draw pose detections for pose in results['pose_detections']: keypoints = pose['keypoints'] for kp in keypoints: x, y, conf = kp if conf > 0.5: cv2.circle(frame, (int(x), int(y)), 4, (0, 0, 255), -1) # Draw analysis box y_offset = 30 cv2.putText(frame, f"Total Objects: {results['stats']['total_objects']}", (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) y_offset += 30 # Draw scene context cv2.putText(frame, "Scene Context:", (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2) y_offset += 30 cv2.putText(frame, f"Scene Type: {results['analysis']['scene_context']['scene_type']}", (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2) y_offset += 30 # Draw cognitive analysis cv2.putText(frame, "Cognitive Analysis:", (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2) y_offset += 30 cv2.putText(frame, f"Group Activity: {results['analysis']['cognitive']['group_activity']}", (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2) # Write frame out.write(frame) processed_frames += 1 except Exception as e: print(f"Error processing frame {frame_count}: {str(e)}") frame_count += 1 # Process every 5th frame to speed up processing if frame_count % 5 != 0: continue # Release resources cap.release() out.release() # Calculate processing time processing_time = time.time() - start_time # Return the processed video with detailed status status = f"Processing complete!\nProcessed {processed_frames} frames in {processing_time:.2f} seconds" return output_path, status except Exception as e: return None, f"Error processing video: {str(e)}" # Create Gradio interface iface = gr.Interface( fn=process_video, inputs=gr.Video(), outputs=[ gr.Video(label="Processed Video"), gr.Textbox(label="Status") ], title="Glad8tr Video Analysis", description="Upload a video to analyze objects, poses, and cognitive states. Note: Processing is limited to first 100 frames for demo purposes.", examples=[ ["teensonstreet.mp4"] ], allow_flagging="never" ) # Launch the interface if __name__ == "__main__": iface.launch(share=True)