Spaces:

mustafa2ak
/

Stray_Dogs

Sleeping

App Files Files Community

mustafa2ak commited on Sep 28

Commit

1b0864d

verified ·

1 Parent(s): 688d84b

Update reid.py

Browse files

Files changed (1) hide show

reid.py +97 -372

reid.py CHANGED Viewed

@@ -1,290 +1,104 @@
 """
-reid.py - Multi-Component ReID System with ResNet, Statistics, and Heuristics
-Includes persistent ID management and configurable component weights
 """
 import numpy as np
 import cv2
 import torch
 import torch.nn as nn
-import torchvision.models as models
-import torchvision.transforms as transforms
 from sklearn.metrics.pairwise import cosine_similarity
 from typing import Dict, List, Optional, Tuple
 from dataclasses import dataclass, field
-import json
-from pathlib import Path
 import warnings
 warnings.filterwarnings('ignore')
 @dataclass
 class DogFeatures:
-    """Enhanced container for dog features"""
-    resnet_features: np.ndarray
-    color_histogram: np.ndarray = None
-    size_info: Dict = field(default_factory=dict)
-    velocity: np.ndarray = None
     confidence: float = 0.5
-    quality_score: float = 0.5
     frame_num: int = 0
-    bbox: List[float] = field(default_factory=list)
-    track_id: int = 0
-class MultiComponentReID:
     """
-    Three-component ReID system with ResNet, Statistics, and Heuristics
     """
-    def __init__(self, device: str = 'cuda', id_offset_file: str = 'reid_state.json'):
         self.device = device if torch.cuda.is_available() else 'cpu'
-        self.id_offset_file = Path(id_offset_file)
-        # Component weights (will be set by sliders)
-        self.component_weights = {
-            'resnet': 0.70,      # 70% default
-            'statistics': 0.20,  # 20% default
-            'heuristics': 0.10   # 10% default
-        }
-        # Similarity thresholds
         self.base_threshold = 0.60
-        self.adaptive_threshold = True
-        # Dog database
         self.dog_database = {}  # dog_id -> list of DogFeatures
-        self.next_dog_id = self._load_id_offset()
-        # Tracking maps
-        self.track_to_dog = {}
-        self.dog_statistics = {}  # dog_id -> statistical features
-        self.dog_last_seen = {}
-        self.dog_entrance_point = {}  # dog_id -> first seen location
-        self.dog_trajectory = {}  # dog_id -> list of positions
         self.current_frame = 0
-        self.frame_width = 640
-        self.frame_height = 480
-        # Initialize ResNet
-        self._initialize_resnet()
-        print(f"✅ Multi-Component ReID initialized")
-        print(f"   Device: {self.device}")
-        print(f"   Starting Dog ID: {self.next_dog_id}")
-        print(f"   Components: ResNet, Statistics, Heuristics")
-    def _load_id_offset(self) -> int:
-        """Load the last used dog ID from persistent storage"""
-        if self.id_offset_file.exists():
-            try:
-                with open(self.id_offset_file, 'r') as f:
-                    data = json.load(f)
-                    return data.get('next_dog_id', 1)
-            except:
-                pass
-        return 1
-    def _save_id_offset(self):
-        """Save the current dog ID counter"""
-        try:
-            with open(self.id_offset_file, 'w') as f:
-                json.dump({'next_dog_id': self.next_dog_id}, f)
-        except:
-            pass
-    def _initialize_resnet(self):
-        """Initialize ResNet50 for feature extraction"""
         try:
-            resnet = models.resnet50(weights='IMAGENET1K_V1')
-            self.resnet_model = nn.Sequential(*list(resnet.children())[:-1])
-            self.resnet_model.to(self.device).eval()
-            self.transform = transforms.Compose([
-                transforms.ToPILImage(),
-                transforms.Resize((224, 224)),
-                transforms.ToTensor(),
-                transforms.Normalize(
-                    mean=[0.485, 0.456, 0.406],
-                    std=[0.229, 0.224, 0.225]
-                )
-            ])
         except Exception as e:
-            print(f"❌ ResNet initialization error: {e}")
-            self.resnet_model = None
-    def set_component_weights(self, resnet: float, statistics: float, heuristics: float):
-        """
-        Set component weights from UI sliders (0-100 scale)
-        Normalizes to ensure they sum to 1.0
-        """
-        total = resnet + statistics + heuristics
-        if total > 0:
-            self.component_weights['resnet'] = resnet / total
-            self.component_weights['statistics'] = statistics / total
-            self.component_weights['heuristics'] = heuristics / total
-        else:
-            # Default if all are zero
-            self.component_weights = {'resnet': 0.7, 'statistics': 0.2, 'heuristics': 0.1}
-        print(f"📊 Component weights updated:")
-        print(f"   ResNet: {self.component_weights['resnet']:.2%}")
-        print(f"   Statistics: {self.component_weights['statistics']:.2%}")
-        print(f"   Heuristics: {self.component_weights['heuristics']:.2%}")
-    def extract_features(self, image: np.ndarray, bbox: List[float] = None,
-                        track_info: Dict = None) -> Optional[DogFeatures]:
-        """Extract multi-component features"""
-        if image is None or image.size == 0:
             return None
-        features = DogFeatures(
-            resnet_features=np.zeros(2048),
-            bbox=bbox if bbox else [0, 0, 100, 100]
-        )
-        # 1. ResNet features
-        if self.resnet_model is not None:
-            try:
-                img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-                img_tensor = self.transform(img_rgb).unsqueeze(0).to(self.device)
-                with torch.no_grad():
-                    resnet_feat = self.resnet_model(img_tensor)
-                    resnet_feat = resnet_feat.squeeze().cpu().numpy()
-                    features.resnet_features = resnet_feat / (np.linalg.norm(resnet_feat) + 1e-7)
-            except:
-                pass
-        # 2. Color histogram (for heuristics)
         try:
-            # Calculate color histogram in HSV space
-            hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
-            hist_h = cv2.calcHist([hsv], [0], None, [30], [0, 180])
-            hist_s = cv2.calcHist([hsv], [1], None, [32], [0, 256])
-            hist_v = cv2.calcHist([hsv], [2], None, [32], [0, 256])
-            color_hist = np.concatenate([hist_h.flatten(), hist_s.flatten(), hist_v.flatten()])
-            features.color_histogram = color_hist / (np.sum(color_hist) + 1e-7)
-        except:
-            features.color_histogram = np.zeros(94)  # 30+32+32
-        # 3. Size information (for statistics)
-        if bbox:
-            features.size_info = {
-                'width': bbox[2] - bbox[0],
-                'height': bbox[3] - bbox[1],
-                'aspect_ratio': (bbox[2] - bbox[0]) / max(1, bbox[3] - bbox[1]),
-                'area': (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
-            }
-        # 4. Motion information (if available)
-        if track_info:
-            features.velocity = track_info.get('velocity', np.array([0, 0]))
-            features.track_id = track_info.get('track_id', 0)
-        features.frame_num = self.current_frame
-        return features
-    def _calculate_resnet_similarity(self, feat1: DogFeatures, feat2: DogFeatures) -> float:
-        """Calculate ResNet-based similarity"""
-        if feat1.resnet_features is None or feat2.resnet_features is None:
-            return 0.0
-        return cosine_similarity(
-            feat1.resnet_features.reshape(1, -1),
-            feat2.resnet_features.reshape(1, -1)
-        )[0, 0]
-    def _calculate_statistical_similarity(self, dog_id: int, new_features: DogFeatures) -> float:
-        """Calculate statistics-based similarity"""
-        if dog_id not in self.dog_statistics:
-            return 0.5  # Neutral score if no statistics
-        stats = self.dog_statistics[dog_id]
-        score = 0.0
-        weights_sum = 0.0
-        # Size consistency
-        if 'avg_size' in stats and new_features.size_info:
-            size_diff = abs(new_features.size_info['area'] - stats['avg_size'])
-            size_score = max(0, 1 - size_diff / (stats['avg_size'] + 1e-7))
-            score += size_score * 0.3
-            weights_sum += 0.3
-        # Aspect ratio consistency
-        if 'avg_aspect' in stats and new_features.size_info:
-            aspect_diff = abs(new_features.size_info['aspect_ratio'] - stats['avg_aspect'])
-            aspect_score = max(0, 1 - aspect_diff / 2)
-            score += aspect_score * 0.2
-            weights_sum += 0.2
-        # Velocity consistency
-        if 'avg_velocity' in stats and new_features.velocity is not None:
-            vel_magnitude_old = np.linalg.norm(stats['avg_velocity'])
-            vel_magnitude_new = np.linalg.norm(new_features.velocity)
-            vel_diff = abs(vel_magnitude_new - vel_magnitude_old)
-            vel_score = max(0, 1 - vel_diff / 50)  # 50 pixels/frame max expected
-            score += vel_score * 0.3
-            weights_sum += 0.3
-        # Confidence pattern
-        if 'avg_confidence' in stats:
-            conf_diff = abs(new_features.confidence - stats['avg_confidence'])
-            conf_score = max(0, 1 - conf_diff)
-            score += conf_score * 0.2
-            weights_sum += 0.2
-        return score / max(weights_sum, 0.1)
-    def _calculate_heuristic_similarity(self, dog_id: int, new_features: DogFeatures) -> float:
-        """Calculate heuristic-based similarity"""
-        score = 0.0
-        weights_sum = 0.0
-        # Temporal proximity (recently seen dogs more likely)
-        if dog_id in self.dog_last_seen:
-            frames_since = self.current_frame - self.dog_last_seen[dog_id]
-            if frames_since < 30:  # Within 1 second at 30fps
-                temporal_score = 1.0 - frames_since / 30
-                score += temporal_score * 0.3
-                weights_sum += 0.3
-        # Spatial coherence (can't teleport)
-        if dog_id in self.dog_trajectory and len(self.dog_trajectory[dog_id]) > 0:
-            last_pos = self.dog_trajectory[dog_id][-1]
-            new_pos = [(new_features.bbox[0] + new_features.bbox[2])/2,
-                      (new_features.bbox[1] + new_features.bbox[3])/2]
-            distance = np.linalg.norm(np.array(new_pos) - np.array(last_pos))
-            max_reasonable_dist = 100  # pixels per frame
-            if distance < max_reasonable_dist:
-                spatial_score = 1.0 - distance / max_reasonable_dist
-                score += spatial_score * 0.4
-                weights_sum += 0.4
-        # Color similarity
-        if dog_id in self.dog_database and new_features.color_histogram is not None:
-            dog_features_list = self.dog_database[dog_id]
-            if dog_features_list and dog_features_list[-1].color_histogram is not None:
-                color_sim = cosine_similarity(
-                    new_features.color_histogram.reshape(1, -1),
-                    dog_features_list[-1].color_histogram.reshape(1, -1)
-                )[0, 0]
-                score += color_sim * 0.3
-                weights_sum += 0.3
-        return score / max(weights_sum, 0.1)
     def match_or_register(self, track, image_crop=None) -> Tuple[int, float]:
-        """
-        Main matching function using all three components
-        """
         self.current_frame += 1
-        # Get detection and extract features
         detection = None
         for det in reversed(track.detections[-3:]):
             if det.image_crop is not None:
@@ -296,15 +110,9 @@ class MultiComponentReID:
             return 0, 0.0
         # Extract features
-        track_info = {
-            'track_id': track.track_id,
-            'velocity': track.velocity if hasattr(track, 'velocity') else np.array([0, 0])
-        }
         features = self.extract_features(
             image_crop,
-            detection.bbox if hasattr(detection, 'bbox') else None,
-            track_info
         )
         if features is None:
@@ -312,120 +120,49 @@ class MultiComponentReID:
         features.confidence = detection.confidence if hasattr(detection, 'confidence') else 0.5
-        # Calculate similarities with all dogs
         best_dog_id = None
         best_score = -1.0
-        for dog_id in self.dog_database:
-            # ResNet similarity
-            resnet_scores = []
-            for dog_feat in self.dog_database[dog_id][-5:]:
-                resnet_scores.append(self._calculate_resnet_similarity(features, dog_feat))
-            resnet_sim = np.mean(resnet_scores) if resnet_scores else 0
-            # Statistical similarity
-            stat_sim = self._calculate_statistical_similarity(dog_id, features)
-            # Heuristic similarity
-            heur_sim = self._calculate_heuristic_similarity(dog_id, features)
-            # Weighted combination
-            total_score = (
-                self.component_weights['resnet'] * resnet_sim +
-                self.component_weights['statistics'] * stat_sim +
-                self.component_weights['heuristics'] * heur_sim
-            )
-            if total_score > best_score:
-                best_score = total_score
-                best_dog_id = dog_id
-        # Determine threshold
-        threshold = self.base_threshold
-        if self.adaptive_threshold:
-            # Adjust based on number of dogs
-            n_dogs = len(self.dog_database)
-            if n_dogs > 5:
-                threshold += 0.05 * min(1, (n_dogs - 5) / 10)
-        # Make decision
-        if best_dog_id is not None and best_score >= threshold:
-            # Match found
-            self._update_dog(best_dog_id, features)
             return best_dog_id, best_score
         else:
             # New dog
-            new_dog_id = self._register_new_dog(features)
             return new_dog_id, 1.0
-    def _update_dog(self, dog_id: int, features: DogFeatures):
-        """Update existing dog with new features"""
-        # Add to database
-        self.dog_database[dog_id].append(features)
-        if len(self.dog_database[dog_id]) > 10:
-            self.dog_database[dog_id] = self.dog_database[dog_id][-10:]
-        # Update statistics
-        if dog_id not in self.dog_statistics:
-            self.dog_statistics[dog_id] = {}
-        stats = self.dog_statistics[dog_id]
-        # Update running averages
-        if features.size_info:
-            stats['avg_size'] = stats.get('avg_size', 0) * 0.9 + features.size_info['area'] * 0.1
-            stats['avg_aspect'] = stats.get('avg_aspect', 1) * 0.9 + features.size_info['aspect_ratio'] * 0.1
-        if features.velocity is not None:
-            if 'avg_velocity' not in stats:
-                stats['avg_velocity'] = features.velocity
-            else:
-                stats['avg_velocity'] = stats['avg_velocity'] * 0.9 + features.velocity * 0.1
-        stats['avg_confidence'] = stats.get('avg_confidence', 0.5) * 0.9 + features.confidence * 0.1
-        # Update tracking
-        self.dog_last_seen[dog_id] = self.current_frame
-        if dog_id not in self.dog_trajectory:
-            self.dog_trajectory[dog_id] = []
-        if features.bbox:
-            center = [(features.bbox[0] + features.bbox[2])/2,
-                     (features.bbox[1] + features.bbox[3])/2]
-            self.dog_trajectory[dog_id].append(center)
-            if len(self.dog_trajectory[dog_id]) > 30:
-                self.dog_trajectory[dog_id] = self.dog_trajectory[dog_id][-30:]
-    def _register_new_dog(self, features: DogFeatures) -> int:
-        """Register a new dog"""
-        new_dog_id = self.next_dog_id
-        self.next_dog_id += 1
-        self._save_id_offset()  # Save the updated counter
-        self.dog_database[new_dog_id] = [features]
-        self.dog_statistics[new_dog_id] = {}
-        self.dog_last_seen[new_dog_id] = self.current_frame
-        if features.bbox:
-            center = [(features.bbox[0] + features.bbox[2])/2,
-                     (features.bbox[1] + features.bbox[3])/2]
-            self.dog_entrance_point[new_dog_id] = center
-            self.dog_trajectory[new_dog_id] = [center]
-        print(f"  🆕 New dog registered: Dog {new_dog_id}")
-        return new_dog_id
     def match_or_register_all(self, track) -> Dict:
-        """Compatible interface for existing code"""
         dog_id, confidence = self.match_or_register(track)
         return {
-            'ResNet50': {
                 'dog_id': dog_id,
-                'confidence': confidence,
-                'processing_time': 0
             }
         }
@@ -435,33 +172,21 @@ class MultiComponentReID:
         print(f"📊 ReID threshold updated to: {self.base_threshold:.2f}")
     def reset_all(self):
-        """Reset for new video but preserve ID counter"""
-        # Clear temporary data
         self.dog_database.clear()
-        self.track_to_dog.clear()
-        self.dog_statistics.clear()
-        self.dog_last_seen.clear()
-        self.dog_trajectory.clear()
-        self.dog_entrance_point.clear()
         self.current_frame = 0
-        # DO NOT reset next_dog_id - preserve it!
-        print(f"🔄 ReID reset - Next dog ID: {self.next_dog_id}")
     def get_statistics(self) -> Dict:
         """Get current statistics"""
         return {
-            'ResNet50': {
-                'total_dogs': len(self.dog_database),
-                'next_dog_id': self.next_dog_id,
-                'threshold': self.base_threshold,
-                'weights': self.component_weights,
-                'current_frame': self.current_frame
-            }
         }
-# Compatibility aliases
-SingleModelReID = MultiComponentReID
-ImprovedResNet50ReID = MultiComponentReID
-DualModelReID = MultiComponentReID

 """
+Simplified ReID with MegaDescriptor-B-224
 """
 import numpy as np
 import cv2
 import torch
 import torch.nn as nn
+import timm
 from sklearn.metrics.pairwise import cosine_similarity
 from typing import Dict, List, Optional, Tuple
 from dataclasses import dataclass, field
 import warnings
 warnings.filterwarnings('ignore')
 @dataclass
 class DogFeatures:
+    """Container for dog features"""
+    features: np.ndarray
+    bbox: List[float] = field(default_factory=list)
     confidence: float = 0.5
     frame_num: int = 0
+class MegaDescriptorReID:
     """
+    Simplified ReID using MegaDescriptor-B-224
     """
+    def __init__(self, device: str = 'cuda'):
         self.device = device if torch.cuda.is_available() else 'cpu'
         self.base_threshold = 0.60
+        # Dog database (temporary only)
         self.dog_database = {}  # dog_id -> list of DogFeatures
+        self.next_dog_id = 1
         self.current_frame = 0
+        # Initialize MegaDescriptor
+        self._initialize_megadescriptor()
+        print(f"✅ MegaDescriptor ReID initialized on {self.device}")
+    def _initialize_megadescriptor(self):
+        """Initialize MegaDescriptor-B-224"""
         try:
+            # Load MegaDescriptor-B-224 (balanced model)
+            self.model = timm.create_model(
+                'hf-hub:BVRA/MegaDescriptor-B-224',
+                pretrained=True
+            )
+            self.model.to(self.device).eval()
+            # Get the preprocessing config
+            data_config = timm.data.resolve_model_data_config(self.model)
+            self.transform = timm.data.create_transform(**data_config, is_training=False)
+            print("✅ MegaDescriptor-B-224 loaded successfully")
         except Exception as e:
+            print(f"❌ MegaDescriptor initialization error: {e}")
+            self.model = None
+    def extract_features(self, image: np.ndarray, bbox: List[float] = None) -> Optional[DogFeatures]:
+        """Extract features using MegaDescriptor"""
+        if image is None or image.size == 0 or self.model is None:
             return None
         try:
+            # Convert BGR to RGB
+            img_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+            # Convert to PIL Image for transform
+            from PIL import Image
+            pil_img = Image.fromarray(img_rgb)
+            # Apply MegaDescriptor transforms
+            img_tensor = self.transform(pil_img).unsqueeze(0).to(self.device)
+            # Extract features
+            with torch.no_grad():
+                features = self.model(img_tensor)
+                features = features.squeeze().cpu().numpy()
+                # L2 normalize
+                features = features / (np.linalg.norm(features) + 1e-7)
+            return DogFeatures(
+                features=features,
+                bbox=bbox if bbox else [0, 0, 100, 100],
+                frame_num=self.current_frame
+            )
+        except Exception as e:
+            print(f"Feature extraction error: {e}")
+            return None
     def match_or_register(self, track, image_crop=None) -> Tuple[int, float]:
+        """Match or register a dog"""
         self.current_frame += 1
+        # Get detection with image
         detection = None
         for det in reversed(track.detections[-3:]):
             if det.image_crop is not None:
             return 0, 0.0
         # Extract features
         features = self.extract_features(
             image_crop,
+            detection.bbox if hasattr(detection, 'bbox') else None
         )
         if features is None:
         features.confidence = detection.confidence if hasattr(detection, 'confidence') else 0.5
+        # Find best match
         best_dog_id = None
         best_score = -1.0
+        for dog_id, dog_features_list in self.dog_database.items():
+            # Calculate similarity with stored features
+            similarities = []
+            for stored_feat in dog_features_list[-5:]:  # Use last 5 features
+                sim = cosine_similarity(
+                    features.features.reshape(1, -1),
+                    stored_feat.features.reshape(1, -1)
+                )[0, 0]
+                similarities.append(sim)
+            if similarities:
+                avg_similarity = np.mean(similarities)
+                if avg_similarity > best_score:
+                    best_score = avg_similarity
+                    best_dog_id = dog_id
+        # Decision: match or new dog
+        if best_dog_id is not None and best_score >= self.base_threshold:
+            # Match found - update database
+            self.dog_database[best_dog_id].append(features)
+            # Keep only last 10 features per dog
+            if len(self.dog_database[best_dog_id]) > 10:
+                self.dog_database[best_dog_id] = self.dog_database[best_dog_id][-10:]
             return best_dog_id, best_score
         else:
             # New dog
+            new_dog_id = self.next_dog_id
+            self.next_dog_id += 1
+            self.dog_database[new_dog_id] = [features]
+            print(f"  🆕 New dog registered: Dog {new_dog_id}")
             return new_dog_id, 1.0
     def match_or_register_all(self, track) -> Dict:
+        """Compatible interface"""
         dog_id, confidence = self.match_or_register(track)
         return {
+            'MegaDescriptor': {
                 'dog_id': dog_id,
+                'confidence': confidence
             }
         }
         print(f"📊 ReID threshold updated to: {self.base_threshold:.2f}")
     def reset_all(self):
+        """Reset for new video"""
         self.dog_database.clear()
+        self.next_dog_id = 1
         self.current_frame = 0
+        print("🔄 ReID reset")
     def get_statistics(self) -> Dict:
         """Get current statistics"""
         return {
+            'total_dogs': len(self.dog_database),
+            'threshold': self.base_threshold
         }
+# Compatibility aliases for existing code
+MultiComponentReID = MegaDescriptorReID
+SingleModelReID = MegaDescriptorReID
+ImprovedResNet50ReID = MegaDescriptorReID