File size: 3,181 Bytes
3c82458
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import cv2
import numpy as np
import torch
from ultralytics import YOLO
from typing import List, Tuple, Optional
from dataclasses import dataclass

@dataclass
class Detection:
    """Simple detection data structure"""
    bbox: List[float]  # [x1, y1, x2, y2]
    confidence: float
    image_crop: Optional[np.ndarray] = None  # Cropped dog image
    
class DogDetector:
    """
    Simplified YOLOv8 detector optimized for dogs
    Uses standard pretrained model - no custom training needed
    """
    
    def __init__(self, 
                 confidence_threshold: float = 0.45,
                 device: str = 'cuda'):
        """
        Initialize detector
        
        Args:
            confidence_threshold: Min confidence for detections (0.45 works well)
            device: 'cuda' for GPU, 'cpu' for CPU
        """
        self.confidence_threshold = confidence_threshold
        self.device = device if torch.cuda.is_available() else 'cpu'
        
        # Load YOLOv8 medium model (good balance of speed/accuracy)
        self.model = YOLO('yolov8m.pt')
        self.model.to(self.device)
        
        # COCO class ID for dog
        self.dog_class_id = 16
        
    def detect(self, frame: np.ndarray) -> List[Detection]:
        """
        Detect dogs in frame
        
        Args:
            frame: BGR image from OpenCV
            
        Returns:
            List of Detection objects with bounding boxes and crops
        """
        # Run YOLO inference
        results = self.model(frame, 
                            conf=self.confidence_threshold,
                            classes=[self.dog_class_id],  # Only detect dogs
                            verbose=False)
        
        detections = []
        
        if results and len(results) > 0:
            result = results[0]
            if result.boxes is not None:
                boxes = result.boxes
                
                for i in range(len(boxes)):
                    # Get bbox coordinates
                    x1, y1, x2, y2 = boxes.xyxy[i].cpu().numpy()
                    x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
                    
                    # Ensure valid coordinates
                    h, w = frame.shape[:2]
                    x1 = max(0, x1)
                    y1 = max(0, y1)
                    x2 = min(w, x2)
                    y2 = min(h, y2)
                    
                    # Skip invalid boxes
                    if x2 <= x1 or y2 <= y1:
                        continue
                    
                    # Crop dog image
                    dog_crop = frame[y1:y2, x1:x2].copy()
                    
                    # Create detection
                    detection = Detection(
                        bbox=[x1, y1, x2, y2],
                        confidence=float(boxes.conf[i]),
                        image_crop=dog_crop
                    )
                    detections.append(detection)
        
        return detections
    
    def set_confidence(self, threshold: float):
        """Update detection confidence threshold"""
        self.confidence_threshold = max(0.1, min(1.0, threshold))