Spaces:
Sleeping
Sleeping
Update reid.py
Browse files
reid.py
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import numpy as np
|
| 2 |
import cv2
|
| 3 |
import torch
|
|
@@ -5,92 +9,53 @@ import torch.nn as nn
|
|
| 5 |
import torchvision.models as models
|
| 6 |
import torchvision.transforms as transforms
|
| 7 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 8 |
-
from typing import Dict, List, Optional, Tuple
|
| 9 |
import time
|
| 10 |
from dataclasses import dataclass
|
| 11 |
-
from collections import deque
|
| 12 |
import warnings
|
| 13 |
warnings.filterwarnings('ignore')
|
| 14 |
|
| 15 |
@dataclass
|
| 16 |
class DogFeatures:
|
| 17 |
-
"""
|
| 18 |
-
|
| 19 |
-
color_histogram: np.ndarray
|
| 20 |
-
body_keypoints: Optional[np.ndarray] = None
|
| 21 |
-
size_features: Optional[np.ndarray] = None # width, height, aspect ratio
|
| 22 |
confidence: float = 0.5
|
| 23 |
quality_score: float = 0.5
|
| 24 |
frame_num: int = 0
|
| 25 |
-
bbox: List[float] = None
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
"""Detailed matching result"""
|
| 30 |
-
dog_id: int
|
| 31 |
-
confidence: float
|
| 32 |
-
match_type: str # 'strong', 'temporal', 'weak', 'new'
|
| 33 |
-
feature_scores: Dict[str, float]
|
| 34 |
-
|
| 35 |
-
class RobustReID:
|
| 36 |
-
"""
|
| 37 |
-
Multi-feature ReID system combining:
|
| 38 |
-
1. Deep CNN features (ResNet50 + EfficientNet)
|
| 39 |
-
2. Color histograms (HSV + Lab color spaces)
|
| 40 |
-
3. Body proportions and size consistency
|
| 41 |
-
4. Temporal consistency tracking
|
| 42 |
-
5. Adaptive thresholds based on context
|
| 43 |
-
"""
|
| 44 |
|
| 45 |
def __init__(self, device: str = 'cuda'):
|
| 46 |
self.device = device if torch.cuda.is_available() else 'cpu'
|
| 47 |
|
| 48 |
-
#
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
'normal_match': 0.55, # Standard match
|
| 52 |
-
'temporal_match': 0.45, # Recently seen dog
|
| 53 |
-
'weak_match': 0.40, # Minimum for match
|
| 54 |
-
'new_dog': 0.65, # Threshold to create new dog
|
| 55 |
-
'color_weight': 0.25, # Weight for color matching
|
| 56 |
-
'size_weight': 0.15, # Weight for size consistency
|
| 57 |
-
'deep_weight': 0.60 # Weight for deep features
|
| 58 |
-
}
|
| 59 |
|
| 60 |
-
#
|
| 61 |
-
self.dog_database = {} # dog_id -> list of
|
| 62 |
-
self.dog_metadata = {} # dog_id -> metadata dict
|
| 63 |
self.next_dog_id = 1
|
| 64 |
|
| 65 |
-
#
|
| 66 |
self.track_to_dog = {} # track_id -> dog_id
|
| 67 |
-
self.
|
| 68 |
-
self.dog_last_seen = {} # dog_id ->
|
| 69 |
self.current_frame = 0
|
| 70 |
|
| 71 |
-
#
|
| 72 |
-
self.
|
| 73 |
|
| 74 |
-
# Initialize models
|
| 75 |
-
self._initialize_models()
|
| 76 |
-
|
| 77 |
-
def _initialize_models(self):
|
| 78 |
-
"""Initialize multiple feature extractors"""
|
| 79 |
try:
|
| 80 |
-
#
|
| 81 |
-
self.
|
| 82 |
-
self.
|
| 83 |
-
self.
|
| 84 |
-
|
| 85 |
-
# Secondary model: MobileNetV2 (lighter, different features)
|
| 86 |
-
self.mobilenet = models.mobilenet_v2(weights='IMAGENET1K_V1')
|
| 87 |
-
self.mobilenet.features.to(self.device).eval()
|
| 88 |
|
| 89 |
-
#
|
| 90 |
self.transform = transforms.Compose([
|
| 91 |
transforms.ToPILImage(),
|
| 92 |
-
transforms.Resize((
|
| 93 |
-
transforms.CenterCrop(224),
|
| 94 |
transforms.ToTensor(),
|
| 95 |
transforms.Normalize(
|
| 96 |
mean=[0.485, 0.456, 0.406],
|
|
@@ -98,447 +63,258 @@ class RobustReID:
|
|
| 98 |
)
|
| 99 |
])
|
| 100 |
|
| 101 |
-
|
| 102 |
-
self.
|
| 103 |
-
|
| 104 |
-
transforms.Resize((256, 256)),
|
| 105 |
-
transforms.RandomCrop(224),
|
| 106 |
-
transforms.ColorJitter(brightness=0.1, contrast=0.1),
|
| 107 |
-
transforms.ToTensor(),
|
| 108 |
-
transforms.Normalize(
|
| 109 |
-
mean=[0.485, 0.456, 0.406],
|
| 110 |
-
std=[0.229, 0.224, 0.225]
|
| 111 |
-
)
|
| 112 |
-
])
|
| 113 |
|
| 114 |
-
print("Robust ReID models initialized successfully")
|
| 115 |
except Exception as e:
|
| 116 |
-
print(f"
|
| 117 |
-
self.
|
| 118 |
-
self.mobilenet = None
|
| 119 |
|
| 120 |
-
def
|
| 121 |
-
"""Extract
|
| 122 |
-
if self.
|
| 123 |
return None
|
| 124 |
|
| 125 |
-
# Quality check
|
| 126 |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 127 |
laplacian_var = cv2.Laplacian(gray, cv2.CV_64F).var()
|
| 128 |
-
if laplacian_var <
|
|
|
|
|
|
|
| 129 |
return None
|
| 130 |
|
| 131 |
try:
|
|
|
|
| 132 |
img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
| 133 |
|
| 134 |
-
#
|
| 135 |
-
|
| 136 |
-
img_tensor = transform(img_rgb).unsqueeze(0).to(self.device)
|
| 137 |
|
|
|
|
| 138 |
with torch.no_grad():
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
resnet_feat = resnet_feat.squeeze().cpu().numpy()
|
| 142 |
-
|
| 143 |
-
# MobileNet features
|
| 144 |
-
mobile_feat = self.mobilenet.features(img_tensor)
|
| 145 |
-
mobile_feat = torch.nn.functional.adaptive_avg_pool2d(mobile_feat, 1)
|
| 146 |
-
mobile_feat = mobile_feat.squeeze().cpu().numpy()
|
| 147 |
-
|
| 148 |
-
# Combine features (weighted concatenation)
|
| 149 |
-
combined = np.concatenate([
|
| 150 |
-
resnet_feat * 0.7, # ResNet weighted more
|
| 151 |
-
mobile_feat * 0.3 # MobileNet complementary
|
| 152 |
-
])
|
| 153 |
-
|
| 154 |
-
# L2 normalize
|
| 155 |
-
combined = combined / (np.linalg.norm(combined) + 1e-7)
|
| 156 |
-
|
| 157 |
-
return combined
|
| 158 |
-
except Exception as e:
|
| 159 |
-
print(f"Feature extraction error: {e}")
|
| 160 |
-
return None
|
| 161 |
-
|
| 162 |
-
def extract_color_histogram(self, image: np.ndarray, bbox: List[float] = None) -> np.ndarray:
|
| 163 |
-
"""Extract robust color histogram features"""
|
| 164 |
-
# Focus on center region if bbox provided
|
| 165 |
-
if bbox:
|
| 166 |
-
h, w = image.shape[:2]
|
| 167 |
-
# Get center 60% of bounding box to avoid background
|
| 168 |
-
cx = (bbox[0] + bbox[2]) / 2
|
| 169 |
-
cy = (bbox[1] + bbox[3]) / 2
|
| 170 |
-
width = (bbox[2] - bbox[0]) * 0.6
|
| 171 |
-
height = (bbox[3] - bbox[1]) * 0.6
|
| 172 |
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
x2 = min(w, int(cx + width/2))
|
| 176 |
-
y2 = min(h, int(cy + height/2))
|
| 177 |
|
| 178 |
-
|
| 179 |
-
roi = image[y1:y2, x1:x2]
|
| 180 |
-
else:
|
| 181 |
-
roi = image
|
| 182 |
-
else:
|
| 183 |
-
roi = image
|
| 184 |
-
|
| 185 |
-
# Multi-color space histograms
|
| 186 |
-
histograms = []
|
| 187 |
-
|
| 188 |
-
# HSV histogram (good for color consistency)
|
| 189 |
-
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
|
| 190 |
-
hist_h = cv2.calcHist([hsv], [0], None, [30], [0, 180])
|
| 191 |
-
hist_s = cv2.calcHist([hsv], [1], None, [16], [0, 256])
|
| 192 |
-
hist_v = cv2.calcHist([hsv], [2], None, [16], [0, 256])
|
| 193 |
-
|
| 194 |
-
# LAB histogram (perceptually uniform)
|
| 195 |
-
lab = cv2.cvtColor(roi, cv2.COLOR_BGR2LAB)
|
| 196 |
-
hist_l = cv2.calcHist([lab], [0], None, [16], [0, 256])
|
| 197 |
-
hist_a = cv2.calcHist([lab], [1], None, [16], [0, 256])
|
| 198 |
-
hist_b = cv2.calcHist([lab], [2], None, [16], [0, 256])
|
| 199 |
-
|
| 200 |
-
# Combine all histograms
|
| 201 |
-
for hist in [hist_h, hist_s, hist_v, hist_l, hist_a, hist_b]:
|
| 202 |
-
hist = hist.flatten()
|
| 203 |
-
hist = hist / (hist.sum() + 1e-7) # Normalize
|
| 204 |
-
histograms.append(hist)
|
| 205 |
-
|
| 206 |
-
combined_hist = np.concatenate(histograms)
|
| 207 |
-
return combined_hist
|
| 208 |
-
|
| 209 |
-
def extract_size_features(self, bbox: List[float]) -> np.ndarray:
|
| 210 |
-
"""Extract size and proportion features"""
|
| 211 |
-
width = bbox[2] - bbox[0]
|
| 212 |
-
height = bbox[3] - bbox[1]
|
| 213 |
-
aspect_ratio = width / (height + 1e-7)
|
| 214 |
-
area = width * height
|
| 215 |
-
|
| 216 |
-
# Normalize to [0, 1] range
|
| 217 |
-
features = np.array([
|
| 218 |
-
width / 1000, # Assume max width 1000px
|
| 219 |
-
height / 1000, # Assume max height 1000px
|
| 220 |
-
aspect_ratio / 3, # Aspect ratio typically 0.3-3
|
| 221 |
-
np.sqrt(area) / 1000 # Square root of area for better scale
|
| 222 |
-
])
|
| 223 |
-
|
| 224 |
-
return np.clip(features, 0, 1)
|
| 225 |
-
|
| 226 |
-
def calculate_feature_similarity(self, features1: DogFeatures, features2: DogFeatures) -> Dict[str, float]:
|
| 227 |
-
"""Calculate similarity scores for all feature types"""
|
| 228 |
-
scores = {}
|
| 229 |
-
|
| 230 |
-
# Deep feature similarity
|
| 231 |
-
if features1.resnet_features is not None and features2.resnet_features is not None:
|
| 232 |
-
scores['deep'] = cosine_similarity(
|
| 233 |
-
features1.resnet_features.reshape(1, -1),
|
| 234 |
-
features2.resnet_features.reshape(1, -1)
|
| 235 |
-
)[0, 0]
|
| 236 |
-
else:
|
| 237 |
-
scores['deep'] = 0.5
|
| 238 |
-
|
| 239 |
-
# Color histogram similarity
|
| 240 |
-
if features1.color_histogram is not None and features2.color_histogram is not None:
|
| 241 |
-
# Use histogram intersection for color
|
| 242 |
-
hist_intersection = np.minimum(features1.color_histogram, features2.color_histogram).sum()
|
| 243 |
-
scores['color'] = hist_intersection
|
| 244 |
-
else:
|
| 245 |
-
scores['color'] = 0.5
|
| 246 |
-
|
| 247 |
-
# Size consistency
|
| 248 |
-
if features1.size_features is not None and features2.size_features is not None:
|
| 249 |
-
size_diff = np.abs(features1.size_features - features2.size_features)
|
| 250 |
-
scores['size'] = 1.0 - np.mean(size_diff)
|
| 251 |
-
else:
|
| 252 |
-
scores['size'] = 0.5
|
| 253 |
-
|
| 254 |
-
# Quality-weighted confidence
|
| 255 |
-
quality_factor = (features1.quality_score + features2.quality_score) / 2
|
| 256 |
-
scores['quality'] = quality_factor
|
| 257 |
-
|
| 258 |
-
return scores
|
| 259 |
-
|
| 260 |
-
def calculate_weighted_similarity(self, scores: Dict[str, float], context: str = 'normal') -> float:
|
| 261 |
-
"""Calculate final similarity with adaptive weights"""
|
| 262 |
-
if context == 'temporal':
|
| 263 |
-
weights = {
|
| 264 |
-
'deep': 0.5,
|
| 265 |
-
'color': 0.3,
|
| 266 |
-
'size': 0.2
|
| 267 |
-
}
|
| 268 |
-
elif context == 'first_sight':
|
| 269 |
-
weights = {
|
| 270 |
-
'deep': 0.8,
|
| 271 |
-
'color': 0.15,
|
| 272 |
-
'size': 0.05
|
| 273 |
-
}
|
| 274 |
-
else: # normal
|
| 275 |
-
weights = {
|
| 276 |
-
'deep': self.thresholds['deep_weight'],
|
| 277 |
-
'color': self.thresholds['color_weight'],
|
| 278 |
-
'size': self.thresholds['size_weight']
|
| 279 |
-
}
|
| 280 |
-
|
| 281 |
-
total_weight = sum(weights.values())
|
| 282 |
-
weighted_sum = sum(scores.get(k, 0.5) * w for k, w in weights.items())
|
| 283 |
-
|
| 284 |
-
# Apply quality factor
|
| 285 |
-
quality = scores.get('quality', 1.0)
|
| 286 |
-
final_score = (weighted_sum / total_weight) * (0.8 + 0.2 * quality)
|
| 287 |
-
|
| 288 |
-
return min(1.0, final_score)
|
| 289 |
-
|
| 290 |
-
def verify_match(self, dog_id: int, new_features: DogFeatures, threshold: float) -> Tuple[bool, float]:
|
| 291 |
-
"""Verify a potential match with multiple samples"""
|
| 292 |
-
if dog_id not in self.dog_database or not self.dog_database[dog_id]:
|
| 293 |
-
return False, 0.0
|
| 294 |
-
|
| 295 |
-
# Get recent high-quality features for comparison
|
| 296 |
-
dog_features = self.dog_database[dog_id]
|
| 297 |
-
comparison_features = sorted(
|
| 298 |
-
dog_features[-10:],
|
| 299 |
-
key=lambda x: x.quality_score * x.confidence,
|
| 300 |
-
reverse=True
|
| 301 |
-
)[:5]
|
| 302 |
-
|
| 303 |
-
similarities = []
|
| 304 |
-
for feat in comparison_features:
|
| 305 |
-
scores = self.calculate_feature_similarity(new_features, feat)
|
| 306 |
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
if similarities:
|
| 313 |
-
median_sim = np.median(similarities)
|
| 314 |
-
max_sim = np.max(similarities)
|
| 315 |
-
final_score = 0.7 * median_sim + 0.3 * max_sim
|
| 316 |
-
else:
|
| 317 |
-
final_score = 0.0
|
| 318 |
-
|
| 319 |
-
return final_score >= threshold, final_score
|
| 320 |
|
| 321 |
-
def match_or_register(self, track) ->
|
| 322 |
-
"""
|
|
|
|
|
|
|
|
|
|
| 323 |
self.current_frame += 1
|
| 324 |
-
|
| 325 |
-
# *** NEW: assign a unique track_id if not already set ***
|
| 326 |
-
if not hasattr(track, 'track_id'):
|
| 327 |
-
if not hasattr(self, '_anon_id'): # internal counter
|
| 328 |
-
self._anon_id = 1
|
| 329 |
-
track.track_id = self._anon_id
|
| 330 |
-
self._anon_id += 1
|
| 331 |
|
| 332 |
# Get best quality detection from recent frames
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
for det in track.detections[-5:]:
|
| 337 |
if det.image_crop is not None:
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
if quality > best_quality:
|
| 341 |
-
best_quality = quality
|
| 342 |
-
best_detection = det
|
| 343 |
|
| 344 |
-
if
|
| 345 |
-
return
|
| 346 |
|
| 347 |
-
if track
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
most_common = max(set(dog_ids), key=dog_ids.count)
|
| 351 |
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
self.
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
)
|
| 378 |
-
|
| 379 |
-
candidates = []
|
| 380 |
-
|
| 381 |
-
for dog_id in self.dog_database.keys():
|
| 382 |
-
if self.dog_database[dog_id]:
|
| 383 |
-
last_color = self.dog_database[dog_id][-1].color_histogram
|
| 384 |
-
if last_color is not None:
|
| 385 |
-
color_sim = np.minimum(color_hist, last_color).sum()
|
| 386 |
-
if color_sim < 0.3:
|
| 387 |
-
continue
|
| 388 |
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
|
| 402 |
-
|
| 403 |
-
match_type = 'strong'
|
| 404 |
-
threshold = self.thresholds['strong_match']
|
| 405 |
-
elif best_score >= self.thresholds['normal_match']:
|
| 406 |
-
match_type = 'normal'
|
| 407 |
-
threshold = self.thresholds['normal_match']
|
| 408 |
-
else:
|
| 409 |
-
match_type = 'weak'
|
| 410 |
-
threshold = self.thresholds['weak_match']
|
| 411 |
|
| 412 |
-
if
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
|
| 418 |
-
|
| 419 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 420 |
self.track_to_dog[track.track_id] = best_dog_id
|
| 421 |
self.dog_last_seen[best_dog_id] = self.current_frame
|
| 422 |
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
|
| 427 |
-
return MatchResult(best_dog_id, best_score, match_type, {'threshold': threshold})
|
| 428 |
-
|
| 429 |
-
new_dog_id = self._register_new_dog(new_features)
|
| 430 |
-
return MatchResult(new_dog_id, 1.0, 'new', {})
|
| 431 |
-
|
| 432 |
-
def _register_new_dog(self, features: DogFeatures) -> int:
|
| 433 |
-
new_dog_id = self.next_dog_id
|
| 434 |
-
self.next_dog_id += 1
|
| 435 |
-
|
| 436 |
-
self.dog_database[new_dog_id] = [features]
|
| 437 |
-
self.dog_metadata[new_dog_id] = {
|
| 438 |
-
'first_seen': self.current_frame,
|
| 439 |
-
'total_sightings': 1,
|
| 440 |
-
'avg_quality': features.quality_score
|
| 441 |
-
}
|
| 442 |
-
self.dog_last_seen[new_dog_id] = self.current_frame
|
| 443 |
-
|
| 444 |
-
return new_dog_id
|
| 445 |
-
|
| 446 |
-
def _update_dog_features(self, dog_id: int, detection):
|
| 447 |
-
if dog_id not in self.dog_database:
|
| 448 |
-
return
|
| 449 |
-
|
| 450 |
-
deep_features = self.extract_deep_features(detection.image_crop, augment=True)
|
| 451 |
-
if deep_features is None:
|
| 452 |
-
return
|
| 453 |
-
|
| 454 |
-
color_hist = self.extract_color_histogram(detection.image_crop, detection.bbox)
|
| 455 |
-
size_features = self.extract_size_features(detection.bbox)
|
| 456 |
-
|
| 457 |
-
new_features = DogFeatures(
|
| 458 |
-
resnet_features=deep_features,
|
| 459 |
-
color_histogram=color_hist,
|
| 460 |
-
size_features=size_features,
|
| 461 |
-
confidence=detection.confidence,
|
| 462 |
-
quality_score=0.5,
|
| 463 |
-
frame_num=self.current_frame,
|
| 464 |
-
bbox=detection.bbox
|
| 465 |
-
)
|
| 466 |
-
|
| 467 |
-
self.dog_database[dog_id].append(new_features)
|
| 468 |
-
self._prune_features(dog_id)
|
| 469 |
-
self.dog_last_seen[dog_id] = self.current_frame
|
| 470 |
-
|
| 471 |
-
def _prune_features(self, dog_id: int):
|
| 472 |
-
if dog_id not in self.dog_database:
|
| 473 |
-
return
|
| 474 |
-
|
| 475 |
-
features = self.dog_database[dog_id]
|
| 476 |
-
if len(features) <= 10:
|
| 477 |
-
return
|
| 478 |
-
|
| 479 |
-
recent = features[-5:]
|
| 480 |
-
older = features[:-5]
|
| 481 |
-
high_quality = sorted(older, key=lambda x: x.quality_score * x.confidence, reverse=True)[:5]
|
| 482 |
-
|
| 483 |
-
if len(older) > 10:
|
| 484 |
-
step = len(older) // 5
|
| 485 |
-
spaced = older[::step][:5]
|
| 486 |
else:
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
if
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 495 |
|
| 496 |
def match_or_register_all(self, track) -> Dict:
|
| 497 |
-
|
|
|
|
|
|
|
| 498 |
return {
|
| 499 |
'ResNet50': {
|
| 500 |
-
'dog_id':
|
| 501 |
-
'confidence':
|
| 502 |
-
'processing_time': 0
|
| 503 |
-
'match_type': result.match_type
|
| 504 |
}
|
| 505 |
}
|
| 506 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 507 |
def reset_all(self):
|
|
|
|
| 508 |
self.dog_database.clear()
|
| 509 |
-
self.dog_metadata.clear()
|
| 510 |
self.track_to_dog.clear()
|
| 511 |
-
self.
|
| 512 |
self.dog_last_seen.clear()
|
| 513 |
-
self.verification_cache.clear()
|
| 514 |
self.next_dog_id = 1
|
| 515 |
self.current_frame = 0
|
| 516 |
-
|
| 517 |
-
def set_all_thresholds(self, base_threshold: float):
|
| 518 |
-
self.thresholds['normal_match'] = max(0.3, min(0.9, base_threshold))
|
| 519 |
-
self.thresholds['strong_match'] = min(0.9, base_threshold + 0.2)
|
| 520 |
-
self.thresholds['weak_match'] = max(0.3, base_threshold - 0.15)
|
| 521 |
-
self.thresholds['temporal_match'] = max(0.3, base_threshold - 0.1)
|
| 522 |
-
self.thresholds['new_dog'] = min(0.9, base_threshold + 0.1)
|
| 523 |
|
| 524 |
def get_statistics(self) -> Dict:
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 529 |
|
| 530 |
-
|
| 531 |
'ResNet50': {
|
| 532 |
'total_dogs': self.next_dog_id - 1,
|
| 533 |
'dogs_in_database': len(self.dog_database),
|
| 534 |
'active_dogs': active_dogs,
|
| 535 |
'avg_features_per_dog': avg_features,
|
| 536 |
-
'
|
| 537 |
'current_frame': self.current_frame
|
| 538 |
}
|
| 539 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 540 |
|
| 541 |
-
#
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
DualModelReID = RobustReID
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
reid.py - Fixed Single-Model Dog Re-Identification System
|
| 3 |
+
Properly respects thresholds to prevent merging different dogs
|
| 4 |
+
"""
|
| 5 |
import numpy as np
|
| 6 |
import cv2
|
| 7 |
import torch
|
|
|
|
| 9 |
import torchvision.models as models
|
| 10 |
import torchvision.transforms as transforms
|
| 11 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 12 |
+
from typing import Dict, List, Optional, Tuple
|
| 13 |
import time
|
| 14 |
from dataclasses import dataclass
|
|
|
|
| 15 |
import warnings
|
| 16 |
warnings.filterwarnings('ignore')
|
| 17 |
|
| 18 |
@dataclass
|
| 19 |
class DogFeatures:
|
| 20 |
+
"""Container for dog features"""
|
| 21 |
+
features: np.ndarray
|
|
|
|
|
|
|
|
|
|
| 22 |
confidence: float = 0.5
|
| 23 |
quality_score: float = 0.5
|
| 24 |
frame_num: int = 0
|
|
|
|
| 25 |
|
| 26 |
+
class SingleModelReID:
|
| 27 |
+
"""Fixed ReID that properly separates different dogs"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
def __init__(self, device: str = 'cuda'):
|
| 30 |
self.device = device if torch.cuda.is_available() else 'cpu'
|
| 31 |
|
| 32 |
+
# CRITICAL FIX: Higher default threshold to prevent merging
|
| 33 |
+
# This is the main fix - was 0.40, now 0.65
|
| 34 |
+
self.similarity_threshold = 0.65
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
+
# In-memory dog database
|
| 37 |
+
self.dog_database = {} # dog_id -> list of features
|
|
|
|
| 38 |
self.next_dog_id = 1
|
| 39 |
|
| 40 |
+
# Track to dog mapping with confidence tracking
|
| 41 |
self.track_to_dog = {} # track_id -> dog_id
|
| 42 |
+
self.track_confidence_history = {} # track_id -> list of confidence scores
|
| 43 |
+
self.dog_last_seen = {} # dog_id -> frame_number
|
| 44 |
self.current_frame = 0
|
| 45 |
|
| 46 |
+
# Debug mode for troubleshooting
|
| 47 |
+
self.debug = True # Set to True to see matching decisions
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
try:
|
| 50 |
+
# Initialize ResNet50 model
|
| 51 |
+
self.model = models.resnet50(weights='IMAGENET1K_V1')
|
| 52 |
+
self.model = nn.Sequential(*list(self.model.children())[:-1])
|
| 53 |
+
self.model.to(self.device).eval()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
+
# Image preprocessing pipeline
|
| 56 |
self.transform = transforms.Compose([
|
| 57 |
transforms.ToPILImage(),
|
| 58 |
+
transforms.Resize((224, 224)),
|
|
|
|
| 59 |
transforms.ToTensor(),
|
| 60 |
transforms.Normalize(
|
| 61 |
mean=[0.485, 0.456, 0.406],
|
|
|
|
| 63 |
)
|
| 64 |
])
|
| 65 |
|
| 66 |
+
print(f"β
ReID initialized successfully")
|
| 67 |
+
print(f" Device: {self.device}")
|
| 68 |
+
print(f" Default threshold: {self.similarity_threshold}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
|
|
|
| 70 |
except Exception as e:
|
| 71 |
+
print(f"β ReID initialization error: {e}")
|
| 72 |
+
self.model = None
|
|
|
|
| 73 |
|
| 74 |
+
def extract_features(self, image: np.ndarray) -> Optional[np.ndarray]:
|
| 75 |
+
"""Extract ResNet50 features from dog image"""
|
| 76 |
+
if self.model is None or image is None or image.size == 0:
|
| 77 |
return None
|
| 78 |
|
| 79 |
+
# Quality check - skip very blurry images
|
| 80 |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
| 81 |
laplacian_var = cv2.Laplacian(gray, cv2.CV_64F).var()
|
| 82 |
+
if laplacian_var < 50: # Too blurry
|
| 83 |
+
if self.debug:
|
| 84 |
+
print(f" Skipping blurry image (variance: {laplacian_var:.1f})")
|
| 85 |
return None
|
| 86 |
|
| 87 |
try:
|
| 88 |
+
# Convert BGR to RGB
|
| 89 |
img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
| 90 |
|
| 91 |
+
# Transform and create batch
|
| 92 |
+
img_tensor = self.transform(img_rgb).unsqueeze(0).to(self.device)
|
|
|
|
| 93 |
|
| 94 |
+
# Extract features
|
| 95 |
with torch.no_grad():
|
| 96 |
+
features = self.model(img_tensor)
|
| 97 |
+
features = features.squeeze().cpu().numpy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
+
# L2 normalize for cosine similarity
|
| 100 |
+
features = features / (np.linalg.norm(features) + 1e-7)
|
|
|
|
|
|
|
| 101 |
|
| 102 |
+
return features
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
+
except Exception as e:
|
| 105 |
+
if self.debug:
|
| 106 |
+
print(f" Feature extraction error: {e}")
|
| 107 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
|
| 109 |
+
def match_or_register(self, track) -> Tuple[int, float]:
|
| 110 |
+
"""
|
| 111 |
+
Main matching function - FIXED to properly use threshold
|
| 112 |
+
Returns: (dog_id, confidence)
|
| 113 |
+
"""
|
| 114 |
self.current_frame += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
# Get best quality detection from recent frames
|
| 117 |
+
detection = None
|
| 118 |
+
for det in reversed(track.detections[-3:]): # Check last 3 detections
|
|
|
|
|
|
|
| 119 |
if det.image_crop is not None:
|
| 120 |
+
detection = det
|
| 121 |
+
break
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
+
if detection is None:
|
| 124 |
+
return 0, 0.0
|
| 125 |
|
| 126 |
+
# Check if track already has a consistent dog ID
|
| 127 |
+
if track.track_id in self.track_to_dog:
|
| 128 |
+
existing_dog_id = self.track_to_dog[track.track_id]
|
|
|
|
| 129 |
|
| 130 |
+
# Verify consistency from confidence history
|
| 131 |
+
if track.track_id in self.track_confidence_history:
|
| 132 |
+
recent_conf = self.track_confidence_history[track.track_id]
|
| 133 |
+
if len(recent_conf) >= 3 and np.mean(recent_conf[-3:]) > 0.5:
|
| 134 |
+
# Track has been consistent, keep same ID
|
| 135 |
+
if self.debug and self.current_frame % 30 == 0: # Print occasionally
|
| 136 |
+
print(f"Frame {self.current_frame}: Track {track.track_id} -> Dog {existing_dog_id} (consistent)")
|
| 137 |
+
|
| 138 |
+
# Update last seen
|
| 139 |
+
self.dog_last_seen[existing_dog_id] = self.current_frame
|
| 140 |
+
return existing_dog_id, np.mean(recent_conf[-3:])
|
| 141 |
+
|
| 142 |
+
# Extract features for new/uncertain detection
|
| 143 |
+
features = self.extract_features(detection.image_crop)
|
| 144 |
+
if features is None:
|
| 145 |
+
return 0, 0.0
|
| 146 |
+
|
| 147 |
+
# Find best matching dog
|
| 148 |
+
best_dog_id = None
|
| 149 |
+
best_score = -1.0
|
| 150 |
+
all_scores = {} # For debugging
|
| 151 |
+
|
| 152 |
+
for dog_id, feature_list in self.dog_database.items():
|
| 153 |
+
if not feature_list:
|
| 154 |
+
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
+
# Calculate similarity with recent features
|
| 157 |
+
similarities = []
|
| 158 |
+
for dog_feat in feature_list[-5:]: # Use last 5 features
|
| 159 |
+
sim = cosine_similarity(
|
| 160 |
+
features.reshape(1, -1),
|
| 161 |
+
dog_feat.features.reshape(1, -1)
|
| 162 |
+
)[0, 0]
|
| 163 |
+
similarities.append(sim)
|
| 164 |
+
|
| 165 |
+
# Average similarity
|
| 166 |
+
avg_similarity = np.mean(similarities) if similarities else 0
|
| 167 |
+
|
| 168 |
+
# Bonus for recently seen dogs (temporal consistency)
|
| 169 |
+
if dog_id in self.dog_last_seen:
|
| 170 |
+
frames_since = self.current_frame - self.dog_last_seen[dog_id]
|
| 171 |
+
if frames_since < 30: # Within ~1 second at 30fps
|
| 172 |
+
avg_similarity += 0.05 * (1 - frames_since / 30)
|
| 173 |
|
| 174 |
+
all_scores[dog_id] = avg_similarity
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
+
if avg_similarity > best_score:
|
| 177 |
+
best_score = avg_similarity
|
| 178 |
+
best_dog_id = dog_id
|
| 179 |
+
|
| 180 |
+
# CRITICAL: Actually use the threshold!
|
| 181 |
+
if self.debug:
|
| 182 |
+
print(f"\nFrame {self.current_frame}, Track {track.track_id}:")
|
| 183 |
+
print(f" Best match: Dog {best_dog_id} with score {best_score:.3f}")
|
| 184 |
+
print(f" Threshold: {self.similarity_threshold:.3f}")
|
| 185 |
+
if len(all_scores) > 1:
|
| 186 |
+
print(f" All scores: {all_scores}")
|
| 187 |
+
|
| 188 |
+
# Decision: Match or create new dog
|
| 189 |
+
if best_dog_id is not None and best_score >= self.similarity_threshold:
|
| 190 |
+
# MATCH FOUND
|
| 191 |
+
if self.debug:
|
| 192 |
+
print(f" β
MATCHED to Dog {best_dog_id}")
|
| 193 |
|
| 194 |
+
# Update dog database with new features
|
| 195 |
+
self.dog_database[best_dog_id].append(
|
| 196 |
+
DogFeatures(
|
| 197 |
+
features=features,
|
| 198 |
+
confidence=detection.confidence,
|
| 199 |
+
quality_score=laplacian_var / 100 if 'laplacian_var' in locals() else 0.5,
|
| 200 |
+
frame_num=self.current_frame
|
| 201 |
+
)
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
# Prune old features to save memory
|
| 205 |
+
if len(self.dog_database[best_dog_id]) > 10:
|
| 206 |
+
self.dog_database[best_dog_id] = self.dog_database[best_dog_id][-10:]
|
| 207 |
+
|
| 208 |
+
# Update tracking maps
|
| 209 |
self.track_to_dog[track.track_id] = best_dog_id
|
| 210 |
self.dog_last_seen[best_dog_id] = self.current_frame
|
| 211 |
|
| 212 |
+
# Update confidence history
|
| 213 |
+
if track.track_id not in self.track_confidence_history:
|
| 214 |
+
self.track_confidence_history[track.track_id] = []
|
| 215 |
+
self.track_confidence_history[track.track_id].append(best_score)
|
| 216 |
+
if len(self.track_confidence_history[track.track_id]) > 10:
|
| 217 |
+
self.track_confidence_history[track.track_id] = self.track_confidence_history[track.track_id][-10:]
|
| 218 |
+
|
| 219 |
+
return best_dog_id, best_score
|
| 220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
else:
|
| 222 |
+
# NO MATCH - Create new dog
|
| 223 |
+
new_dog_id = self.next_dog_id
|
| 224 |
+
self.next_dog_id += 1
|
| 225 |
+
|
| 226 |
+
if self.debug:
|
| 227 |
+
print(f" π NEW DOG created: Dog {new_dog_id}")
|
| 228 |
+
|
| 229 |
+
# Initialize new dog in database
|
| 230 |
+
self.dog_database[new_dog_id] = [
|
| 231 |
+
DogFeatures(
|
| 232 |
+
features=features,
|
| 233 |
+
confidence=detection.confidence,
|
| 234 |
+
quality_score=0.5,
|
| 235 |
+
frame_num=self.current_frame
|
| 236 |
+
)
|
| 237 |
+
]
|
| 238 |
+
|
| 239 |
+
# Update tracking maps
|
| 240 |
+
self.track_to_dog[track.track_id] = new_dog_id
|
| 241 |
+
self.dog_last_seen[new_dog_id] = self.current_frame
|
| 242 |
+
self.track_confidence_history[track.track_id] = [1.0]
|
| 243 |
+
|
| 244 |
+
return new_dog_id, 1.0
|
| 245 |
|
| 246 |
def match_or_register_all(self, track) -> Dict:
|
| 247 |
+
"""Compatible interface for existing code"""
|
| 248 |
+
dog_id, confidence = self.match_or_register(track)
|
| 249 |
+
|
| 250 |
return {
|
| 251 |
'ResNet50': {
|
| 252 |
+
'dog_id': dog_id,
|
| 253 |
+
'confidence': confidence,
|
| 254 |
+
'processing_time': 0
|
|
|
|
| 255 |
}
|
| 256 |
}
|
| 257 |
|
| 258 |
+
def set_all_thresholds(self, threshold: float):
|
| 259 |
+
"""
|
| 260 |
+
CRITICAL FIX: Actually apply the threshold from the slider!
|
| 261 |
+
This was the main bug - threshold wasn't being applied
|
| 262 |
+
"""
|
| 263 |
+
old_threshold = self.similarity_threshold
|
| 264 |
+
self.similarity_threshold = max(0.3, min(0.95, threshold))
|
| 265 |
+
|
| 266 |
+
print(f"π ReID threshold changed: {old_threshold:.2f} -> {self.similarity_threshold:.2f}")
|
| 267 |
+
|
| 268 |
+
# Provide guidance based on threshold
|
| 269 |
+
if self.similarity_threshold < 0.5:
|
| 270 |
+
print(" β οΈ LOW threshold - may merge different dogs")
|
| 271 |
+
elif self.similarity_threshold > 0.75:
|
| 272 |
+
print(" β οΈ HIGH threshold - may split same dog")
|
| 273 |
+
else:
|
| 274 |
+
print(" β
Balanced threshold")
|
| 275 |
+
|
| 276 |
def reset_all(self):
|
| 277 |
+
"""Reset all temporary data"""
|
| 278 |
self.dog_database.clear()
|
|
|
|
| 279 |
self.track_to_dog.clear()
|
| 280 |
+
self.track_confidence_history.clear()
|
| 281 |
self.dog_last_seen.clear()
|
|
|
|
| 282 |
self.next_dog_id = 1
|
| 283 |
self.current_frame = 0
|
| 284 |
+
print("π ReID system reset - all dogs cleared")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
|
| 286 |
def get_statistics(self) -> Dict:
|
| 287 |
+
"""Get current statistics"""
|
| 288 |
+
# Count active dogs (seen in last 150 frames)
|
| 289 |
+
active_dogs = 0
|
| 290 |
+
for dog_id, last_frame in self.dog_last_seen.items():
|
| 291 |
+
if self.current_frame - last_frame < 150:
|
| 292 |
+
active_dogs += 1
|
| 293 |
+
|
| 294 |
+
# Average features per dog
|
| 295 |
+
if self.dog_database:
|
| 296 |
+
avg_features = np.mean([len(features) for features in self.dog_database.values()])
|
| 297 |
+
else:
|
| 298 |
+
avg_features = 0
|
| 299 |
|
| 300 |
+
stats = {
|
| 301 |
'ResNet50': {
|
| 302 |
'total_dogs': self.next_dog_id - 1,
|
| 303 |
'dogs_in_database': len(self.dog_database),
|
| 304 |
'active_dogs': active_dogs,
|
| 305 |
'avg_features_per_dog': avg_features,
|
| 306 |
+
'threshold': self.similarity_threshold,
|
| 307 |
'current_frame': self.current_frame
|
| 308 |
}
|
| 309 |
}
|
| 310 |
+
|
| 311 |
+
return stats
|
| 312 |
+
|
| 313 |
+
def set_debug_mode(self, debug: bool):
|
| 314 |
+
"""Enable/disable debug output"""
|
| 315 |
+
self.debug = debug
|
| 316 |
+
print(f"Debug mode: {'ON' if debug else 'OFF'}")
|
| 317 |
|
| 318 |
+
# Compatibility aliases for existing code
|
| 319 |
+
ImprovedResNet50ReID = SingleModelReID
|
| 320 |
+
DualModelReID = SingleModelReID
|
|
|