import cv2
import torch
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import img_to_array
from collections import Counter

# Load the trained MoBiLSTM model for violence detection
violence_model = load_model('MoBiLSTM_model.h5')  # Replace with the path to your model

# Load YOLOv5 model for crowd detection
yolo_model = torch.hub.load('ultralytics/yolov5', 'yolov5m')  # Load the small YOLOv5 model

# Define constants for frame extraction
IMAGE_HEIGHT, IMAGE_WIDTH = 64, 64  # Adjust based on your model's input size
SEQUENCE_LENGTH = 16  # Number of frames to pass for sequence input

# Function to extract frames from the video, skipping 2 frames (process every 3rd frame)
def extract_frames(video_path):
    frames_list = []
    video_reader = cv2.VideoCapture(video_path)
    
    total_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    skip_frames_window = max(int(total_frames / SEQUENCE_LENGTH), 1)
    
    for frame_counter in range(SEQUENCE_LENGTH):
        # Skip 2 frames (process every 3rd frame)
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * 3 * skip_frames_window)
        ret, frame = video_reader.read()
        
        if not ret:
            break
        
        resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))
        normalized_frame = resized_frame / 255.0  # Normalize to [0,1]
        frames_list.append(normalized_frame)
    
    video_reader.release()
    
    # If fewer frames, add dummy frames (zeros)
    if len(frames_list) < SEQUENCE_LENGTH:
        frames_list.extend([np.zeros((IMAGE_HEIGHT, IMAGE_WIDTH, 3))] * (SEQUENCE_LENGTH - len(frames_list)))
    
    return np.array(frames_list)

# Function to predict violence in the video
def predict_video_class(video_path):
    frames = extract_frames(video_path)
    frames = np.expand_dims(frames, axis=0)  # Add batch dimension (1, SEQUENCE_LENGTH, IMAGE_HEIGHT, IMAGE_WIDTH, 3)
    
    # Predict the class (0 = Non-Violence, 1 = Violence)
    prediction = violence_model.predict(frames)
    class_index = np.argmax(prediction, axis=1)[0]
    
    return class_index

# Function to detect crowd size using YOLOv5
def detect_crowd(video_path):
    total_person_count = 0
    frame_counter = 0
    video_reader = cv2.VideoCapture(video_path)
    
    while True:
        ret, frame = video_reader.read()
        if not ret:
            break
        
        frame_counter += 1
        
        # Skip 2 frames (process every 3rd frame)
        if frame_counter % 3 == 0:
            # Use YOLOv5 to detect objects (people in this case)
            results = yolo_model(frame)
            
            # Get results in pandas format for easier access
            df = results.pandas().xywh[0]  # Get the dataframe of detections for the first frame
            
            # Filter out only "person" detections
            person_detections = df[df['name'] == 'person']
            
            # Count the number of people detected
            person_count = person_detections.shape[0]
            
            # Accumulate the total number of people detected
            total_person_count += person_count
        
    video_reader.release()

    # Calculate the average crowd size by dividing by the number of frames processed
    average_crowd_count = total_person_count * 3 / frame_counter if frame_counter > 0 else 0
    
    # Round the crowd count to the nearest higher integer (ceiling)
    rounded_crowd_count = np.ceil(average_crowd_count)
    
    # Classify crowd size based on the average number of people detected
    if rounded_crowd_count > 10:
        crowd_class = "Large Crowd"
    elif rounded_crowd_count > 3:
        crowd_class = "Small Crowd"
    else:
        crowd_class = "No Crowd"
    
    return crowd_class, rounded_crowd_count


# Main function to analyze both violence and crowd size
def analyze_video(video_path):
    # Get violence prediction (0 = Non-Violence, 1 = Violence)
    violence_class = predict_video_class(video_path)
    if violence_class == 0:
        violence_status = "Non-Violence"
    else:
        violence_status = "Violence"
    
    # Get crowd detection
    crowd_status, crowd_count = detect_crowd(video_path)
    
    return violence_status, crowd_status, crowd_count

# Example usage
video_path = r'C:\Users\Asus\Downloads\Project\2\1107342075-preview.mp4'  # Replace with the path to your test video
violence_status, crowd_status, crowd_count = analyze_video(video_path)

print(f"Violence Status: {violence_status}")
print(f"Crowd Status: {crowd_status}")
print(f"Crowd Count (rounded): {crowd_count}")