from flask import send_file,request,Response from ultralytics import YOLO import cv2 import numpy as np import random import io import tempfile import os # Load model once at startup model = YOLO("yolov8n.pt") def detect_image(file): img_bytes = file.read() # Convert bytes to OpenCV image np_arr = np.frombuffer(img_bytes, np.uint8) image = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) if image is None: return {"error": "Invalid image"}, 400 # Run detection results = model.predict(source=image, save=False, conf=0.3) for result in results: for box in result.boxes: x1, y1, x2, y2 = map(int, box.xyxy[0]) cls = int(box.cls[0]) label = model.names[cls] conf = box.conf[0] # Same color for same class random.seed(cls) color = [random.randint(0, 255) for _ in range(3)] cv2.rectangle(image, (x1, y1), (x2, y2), color, 2) cv2.putText(image, f"{label} {conf:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2) # Encode image back to bytes _, buffer = cv2.imencode('.jpg', image) return send_file( io.BytesIO(buffer.tobytes()), mimetype='image/jpeg', as_attachment=False, download_name='detected.jpg' ) def detect_video(file): # Save uploaded file to a temp file with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_input: temp_input.write(file.read()) input_path = temp_input.name cap = cv2.VideoCapture(input_path) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) or 25.0 with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_output: output_path = temp_output.name fourcc = cv2.VideoWriter_fourcc(*'avc1') # or *'XVID' out = cv2.VideoWriter(output_path, fourcc, fps, (width, height)) while True: ret, frame = cap.read() if not ret: break results = model.predict(source=frame, save=False, conf=0.3) for result in results: for box in result.boxes: x1, y1, x2, y2 = map(int, box.xyxy[0]) cls = int(box.cls[0]) conf = box.conf[0] label = model.names[cls] random.seed(cls) color = [random.randint(0, 255) for _ in range(3)] cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2) cv2.putText(frame, f"{label} {conf:.2f}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2) out.write(frame) cap.release() out.release() with open(output_path, 'rb') as f: video_bytes = f.read() os.remove(input_path) os.remove(output_path) return io.BytesIO(video_bytes)