Spaces:

sai4444
/

object-detection-using-yolov8

Runtime error

File size: 2,950 Bytes

1e08de6

from flask import send_file,request,Response
from ultralytics import YOLO
import cv2
import numpy as np
import random
import io
import tempfile
import os

# Load model once at startup
model = YOLO("yolov8n.pt")

def detect_image(file):
    img_bytes = file.read()

    # Convert bytes to OpenCV image
    np_arr = np.frombuffer(img_bytes, np.uint8)
    image = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
    if image is None:
        return {"error": "Invalid image"}, 400
    
    # Run detection
    results = model.predict(source=image, save=False, conf=0.3)
    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            cls = int(box.cls[0])
            label = model.names[cls]
            conf = box.conf[0]

            # Same color for same class
            random.seed(cls)
            color = [random.randint(0, 255) for _ in range(3)]

            cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
            cv2.putText(image, f"{label} {conf:.2f}", (x1, y1 - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)
            
        # Encode image back to bytes
    _, buffer = cv2.imencode('.jpg', image)
    return send_file(
        io.BytesIO(buffer.tobytes()),
        mimetype='image/jpeg',
        as_attachment=False,
        download_name='detected.jpg'
    )

def detect_video(file):
    # Save uploaded file to a temp file
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_input:
        temp_input.write(file.read())
        input_path = temp_input.name

    cap = cv2.VideoCapture(input_path)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = cap.get(cv2.CAP_PROP_FPS) or 25.0

    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_output:
        output_path = temp_output.name

    fourcc = cv2.VideoWriter_fourcc(*'avc1')  # or *'XVID'
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        results = model.predict(source=frame, save=False, conf=0.3)
        for result in results:
            for box in result.boxes:
                x1, y1, x2, y2 = map(int, box.xyxy[0])
                cls = int(box.cls[0])
                conf = box.conf[0]
                label = model.names[cls]

                random.seed(cls)
                color = [random.randint(0, 255) for _ in range(3)]
                cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
                cv2.putText(frame, f"{label} {conf:.2f}", (x1, y1 - 10),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

        out.write(frame)

    cap.release()
    out.release()

    with open(output_path, 'rb') as f:
        video_bytes = f.read()

    os.remove(input_path)
    os.remove(output_path)

    return io.BytesIO(video_bytes)