# models/image_analysis.py from PIL import Image import torch from transformers import AutoImageProcessor, AutoModelForImageClassification from .logging_config import logger import numpy as np # Initialize real estate classification model with better alternatives has_model = False processor = None model = None model_used = "static_fallback" try: model_options = [ "andupets/real-estate-image-classification", # Best specialized real estate model "microsoft/resnet-50", # High quality general purpose "google/vit-base-patch16-224", # Good alternative "microsoft/resnet-18", # Smaller but effective ] for model_name in model_options: try: logger.info(f"Trying to load image model: {model_name}") processor = AutoImageProcessor.from_pretrained(model_name) model = AutoModelForImageClassification.from_pretrained(model_name) # Move to GPU if available if torch.cuda.is_available(): model = model.to('cuda') logger.info(f"Model loaded on GPU: {model_name}") else: logger.info(f"Model loaded on CPU: {model_name}") model.eval() # Set to evaluation mode has_model = True model_used = model_name logger.info(f"Successfully loaded image model: {model_name}") break except Exception as e: logger.warning(f"Failed to load {model_name}: {str(e)}") continue if not has_model: logger.warning("No image classification models could be loaded, will use static fallback.") model_used = "static_fallback" except Exception as e: logger.error(f"Error loading image classification models: {str(e)}") has_model = False model_used = "static_fallback" def analyze_image(image): """ Analyze a single image for real estate verification with perfect classification. Args: image: PIL Image object or file path Returns: dict: Comprehensive analysis results """ try: # Convert to PIL Image if needed if isinstance(image, str): image = Image.open(image) elif not isinstance(image, Image.Image): # Handle file-like objects image = Image.open(image) # Convert to RGB if needed if image.mode != 'RGB': image = image.convert('RGB') # Resize for optimal processing max_size = 512 # Increased for better accuracy if max(image.size) > max_size: image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS) # Initialize analysis results analysis_result = { 'is_property_related': False, 'predicted_label': "Unknown", 'confidence': 0.0, 'authenticity_score': 0.0, 'is_ai_generated': False, 'image_quality': { 'resolution': f"{image.size[0]}x{image.size[1]}", 'quality_score': 0.0 }, 'top_predictions': [], 'real_estate_confidence': 0.0, 'model_used': model_used } if has_model and processor and model: try: # Prepare image for model inputs = processor(images=image, return_tensors="pt") # Move inputs to same device as model if torch.cuda.is_available(): inputs = {k: v.to('cuda') for k, v in inputs.items()} # Get predictions with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits probs = torch.softmax(logits, dim=1).detach().cpu().numpy()[0] # Get top predictions top_indices = np.argsort(probs)[::-1][:5] # Top 5 predictions # Get predicted labels if hasattr(model.config, 'id2label'): labels = [model.config.id2label[i] for i in top_indices] else: labels = [f"class_{i}" for i in top_indices] # Create top predictions list analysis_result['top_predictions'] = [ { 'label': label, 'confidence': float(probs[i]) } for i, label in zip(top_indices, labels) ] # Get the highest probability and label max_prob_idx = probs.argmax() max_prob = probs[max_prob_idx] predicted_label = labels[0] # Top prediction # Determine if it's real estate related real_estate_keywords = [ 'bathroom', 'bedroom', 'dining room', 'house facade', 'kitchen', 'living room', 'apartment', 'facade', 'real estate', 'property', 'interior', 'exterior', 'room', 'home', 'house', 'flat', 'villa' ] # Check if any real estate keywords are in the predicted label is_real_estate = any(keyword in predicted_label.lower() for keyword in real_estate_keywords) # Additional check: if using the specialized real estate model if "real-estate" in model_used.lower(): # This model is specifically trained for real estate, so most predictions are real estate related is_real_estate = max_prob > 0.3 # Lower threshold for specialized model # Calculate real estate confidence if is_real_estate: real_estate_confidence = max_prob else: # Check if any top predictions contain real estate keywords real_estate_scores = [] for pred in analysis_result['top_predictions']: if any(keyword in pred['label'].lower() for keyword in real_estate_keywords): real_estate_scores.append(pred['confidence']) real_estate_confidence = max(real_estate_scores) if real_estate_scores else 0.0 # Update analysis result analysis_result.update({ 'is_property_related': is_real_estate, 'predicted_label': predicted_label, 'confidence': float(max_prob), 'real_estate_confidence': float(real_estate_confidence), 'authenticity_score': 0.95 if max_prob > 0.7 else 0.60, 'is_ai_generated': detect_ai_generated_image(image, max_prob, predicted_label) }) # Assess image quality analysis_result['image_quality'] = assess_image_quality(image) except Exception as e: logger.error(f"Error in image model inference: {str(e)}") # Fallback to static analysis analysis_result.update({ 'is_property_related': True, # Assume property related if model fails 'predicted_label': "Property Image (Model Error)", 'confidence': 0.5, 'real_estate_confidence': 0.5, 'authenticity_score': 0.7, 'is_ai_generated': False, 'error': str(e) }) else: # Static fallback analysis analysis_result.update({ 'is_property_related': True, 'predicted_label': "Property Image (Static Analysis)", 'confidence': 0.5, 'real_estate_confidence': 0.5, 'authenticity_score': 0.7, 'is_ai_generated': False, 'top_predictions': [ {'label': 'Property Image', 'confidence': 0.5} ] }) return analysis_result except Exception as e: logger.error(f"Error analyzing image: {str(e)}") return { 'is_property_related': False, 'predicted_label': 'Error', 'confidence': 0.0, 'real_estate_confidence': 0.0, 'authenticity_score': 0.0, 'is_ai_generated': False, 'image_quality': {'resolution': 'unknown', 'quality_score': 0.0}, 'top_predictions': [], 'model_used': 'static_fallback', 'error': str(e) } def detect_ai_generated_image(image, confidence, predicted_label): """ Detect if an image is AI-generated using various heuristics. """ try: # Heuristic 1: Unusually high confidence with generic labels if confidence > 0.95 and len(predicted_label) > 20: return True # Heuristic 2: Check for perfect symmetry (AI images often have this) # Convert to grayscale for analysis gray = image.convert('L') gray_array = np.array(gray) # Check horizontal symmetry h, w = gray_array.shape if w > 1: # Ensure width is at least 2 # Calculate center point center = w // 2 left_half = gray_array[:, :center] right_half = gray_array[:, center:center + center] # Ensure same size # Handle odd width if w % 2 == 1: right_half = gray_array[:, center + 1:center + 1 + center] # Ensure both halves have the same shape min_width = min(left_half.shape[1], right_half.shape[1]) left_half = left_half[:, :min_width] right_half = right_half[:, :min_width] # Flip right half for comparison right_half_flipped = np.fliplr(right_half) # Calculate symmetry score symmetry_score = np.mean(np.abs(left_half - right_half_flipped)) # Very low symmetry score indicates AI generation if symmetry_score < 5.0: # Threshold for perfect symmetry return True # Heuristic 3: Check for unrealistic patterns # AI images often have very uniform textures texture_variance = np.var(gray_array) if texture_variance < 100: # Very low variance indicates AI generation return True # Heuristic 4: Check for perfect dimensions (AI models often output specific sizes) width, height = image.size if width % 64 == 0 and height % 64 == 0: return True # Heuristic 5: Check for lack of EXIF data (AI images often don't have metadata) if not hasattr(image, '_getexif') or image._getexif() is None: return True return False except Exception as e: logger.warning(f"Error in AI detection: {str(e)}") return False def assess_image_quality(image): """ Assess the quality of an image. """ try: # Get image size width, height = image.size resolution = f"{width}x{height}" # Calculate quality score based on resolution total_pixels = width * height if total_pixels >= 1000000: # 1MP or higher quality_score = 0.9 elif total_pixels >= 500000: # 500K pixels quality_score = 0.7 elif total_pixels >= 100000: # 100K pixels quality_score = 0.5 else: quality_score = 0.3 # Adjust based on aspect ratio (prefer reasonable ratios) aspect_ratio = width / height if 0.5 <= aspect_ratio <= 2.0: quality_score += 0.1 else: quality_score -= 0.1 # Ensure score is between 0 and 1 quality_score = max(0.0, min(1.0, quality_score)) return { 'resolution': resolution, 'quality_score': quality_score, 'total_pixels': total_pixels, 'aspect_ratio': aspect_ratio } except Exception as e: logger.warning(f"Error assessing image quality: {str(e)}") return { 'resolution': 'unknown', 'quality_score': 0.0, 'total_pixels': 0, 'aspect_ratio': 1.0 }