# models/trust_score.py from .model_loader import load_model from .logging_config import logger import re def generate_trust_score(text, image_analysis, pdf_analysis): try: # Start with a much higher base score for legitimate properties trust_score = 50.0 # Increased from 30.0 to 50.0 to give more reasonable starting point reasoning_parts = [] # Simple text-based trust indicators text_lower = str(text).lower() # CRITICAL: Check for obvious fake data patterns - but be much less punitive fake_patterns = [ r'^\d+$', # Only numbers (very strict) r'price.*\d{1,2}', # Very low prices (more lenient) r'size.*\d{1,2}', # Very small sizes (more lenient) ] fake_detected = False for pattern in fake_patterns: if re.search(pattern, text_lower): # Only mark as fake if it's extremely obvious if pattern == r'^\d+$' and len(text.strip()) <= 3: fake_detected = True trust_score -= 10 # Reduced penalty from 15 to 10 reasoning_parts.append("Detected suspicious number patterns") break # For other patterns, be more lenient elif pattern in [r'price.*\d{1,2}', r'size.*\d{1,2}']: # Only mark as fake if multiple patterns are found continue # Check for repeated numbers (like "2, 2, 2, 2") - but be much less punitive numbers = re.findall(r'\b\d+\b', text_lower) if len(numbers) >= 5: # Increased threshold from 3 to 5 unique_numbers = set(numbers) if len(unique_numbers) <= 1: # Only if ALL numbers are the same fake_detected = True trust_score -= 15 # Reduced penalty from 20 to 15 reasoning_parts.append("Detected repeated number patterns (likely fake data)") # Check for extremely low values - but be much less punitive if any(word in text_lower for word in ['₹1', '₹2']): # Only extremely low values fake_detected = True trust_score -= 20 # Reduced penalty from 25 to 20 reasoning_parts.append("Detected suspiciously low pricing") # Check for very small property sizes - but be much less punitive if any(word in text_lower for word in ['1 sq', '2 sq']): # Only extremely small fake_detected = True trust_score -= 15 # Reduced penalty from 20 to 15 reasoning_parts.append("Detected suspiciously small property size") # Positive trust indicators - Much more generous positive_indicators = [ 'apartment', 'flat', 'house', 'villa', 'bungalow', 'property', 'real estate', 'bedroom', 'bathroom', 'kitchen', 'living', 'dining', 'balcony', 'parking', 'amenities', 'facilities', 'security', 'lift', 'gym', 'pool', 'garden', 'hyderabad', 'mumbai', 'delhi', 'bangalore', 'chennai', 'kolkata', 'pune', 'verified', 'authentic', 'genuine', 'legitimate', 'original', 'certified', 'pg', 'hostel', 'office', 'commercial', 'retail', 'warehouse', 'industrial' ] negative_indicators = [ 'fake', 'fraud', 'scam', 'suspicious', 'doubtful', 'unverified', 'unauthentic', 'illegal', 'unauthorized', 'forged', 'counterfeit', 'bogus', 'phony' ] positive_count = sum(1 for indicator in positive_indicators if indicator in text_lower) negative_count = sum(1 for indicator in negative_indicators if indicator in text_lower) # Adjust score based on indicators - Much more balanced if positive_count > 0 and not fake_detected: trust_score += min(25, positive_count * 4) # Increased from 20 to 25 reasoning_parts.append(f"Found {positive_count} positive trust indicators") if negative_count > 0: trust_score -= min(20, negative_count * 4) # Reduced penalty from 25 to 20 reasoning_parts.append(f"Found {negative_count} negative trust indicators") # Image analysis contribution - Much more balanced if image_analysis: image_count = len(image_analysis) if isinstance(image_analysis, list) else 1 if image_count > 0: # Check if images are actually property-related property_related_count = sum(1 for img in image_analysis if img.get('is_property_related', False)) if property_related_count > 0: trust_score += min(20, property_related_count * 5) # Increased from 15 to 20 reasoning_parts.append(f"Property has {property_related_count} property-related images") else: trust_score -= 10 # Reduced penalty from 15 to 10 reasoning_parts.append("No property-related images detected") # Bonus for multiple high-quality images if property_related_count >= 3: trust_score += 12 # Increased from 8 to 12 reasoning_parts.append("Multiple property images provided") # PDF analysis contribution - Much more balanced if pdf_analysis: pdf_count = len(pdf_analysis) if isinstance(pdf_analysis, list) else 1 if pdf_count > 0: # Check if documents are actually property-related property_related_docs = sum(1 for doc in pdf_analysis if doc.get('is_property_related', False)) if property_related_docs > 0: trust_score += min(20, property_related_docs * 6) # Increased from 15 to 20 reasoning_parts.append(f"Property has {property_related_docs} property-related documents") else: trust_score -= 8 # Reduced penalty from 10 to 8 reasoning_parts.append("No property-related documents detected") # Bonus for multiple documents if property_related_docs >= 2: trust_score += 8 # Increased from 5 to 8 reasoning_parts.append("Multiple supporting documents provided") # Text quality assessment - Much more balanced if text and len(text) > 200 and not fake_detected: trust_score += 15 # Increased from 12 to 15 reasoning_parts.append("Detailed property description provided") elif text and len(text) > 100 and not fake_detected: trust_score += 10 # Increased from 8 to 10 reasoning_parts.append("Adequate property description provided") elif len(text) < 50: trust_score -= 10 # Reduced penalty from 15 to 10 reasoning_parts.append("Very short property description") # Location quality assessment - Much more balanced if 'hyderabad' in text_lower or 'mumbai' in text_lower or 'delhi' in text_lower or 'bangalore' in text_lower: if not fake_detected: trust_score += 8 # Increased from 5 to 8 reasoning_parts.append("Property in major city") # Property type assessment - Much more balanced if any(prop_type in text_lower for prop_type in ['apartment', 'flat', 'house', 'villa', 'bungalow', 'pg', 'office']): if not fake_detected: trust_score += 6 # Increased from 4 to 6 reasoning_parts.append("Clear property type mentioned") # Amenities assessment - Much more balanced amenities_count = sum(1 for amenity in ['pool', 'gym', 'garden', 'parking', 'security', 'lift', 'balcony'] if amenity in text_lower) if amenities_count > 0 and not fake_detected: trust_score += min(12, amenities_count * 3) # Increased from 8 to 12 reasoning_parts.append(f"Property has {amenities_count} amenities mentioned") # CRITICAL: Additional fake data checks - but be much less punitive # Check if all major fields are just numbers numeric_fields = ['property_name', 'bedrooms', 'bathrooms', 'sq_ft', 'market_value'] numeric_count = 0 for field in numeric_fields: if field in text_lower and re.search(r'\b\d{1,2}\b', text_lower): numeric_count += 1 if numeric_count >= 4: # Increased threshold from 3 to 4 fake_detected = True trust_score -= 25 # Reduced penalty from 30 to 25 reasoning_parts.append("Multiple fields contain only numbers (highly suspicious)") # Ensure minimum score for any valid data if trust_score < 20 and (image_analysis or pdf_analysis): trust_score = 20 # Increased minimum score from 10 to 20 # Ensure score is within bounds trust_score = max(0, min(100, trust_score)) # Create reasoning if reasoning_parts: reasoning = ". ".join(reasoning_parts) + "." else: reasoning = "Basic trust assessment completed." return trust_score, reasoning except Exception as e: logger.error(f"Error in trust score generation: {str(e)}") return 35.0, f"Trust analysis failed: {str(e)}" # Increased from 20.0 to 35.0