File size: 9,587 Bytes
877e000 0e5c14c 877e000 ebb3d5e 877e000 6e3dbdb ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 9860c76 ebb3d5e 6e3dbdb 9860c76 ebb3d5e 6e3dbdb 9860c76 6e3dbdb ebb3d5e 0e5c14c ebb3d5e 6e3dbdb ebb3d5e 6e3dbdb ebb3d5e 6e3dbdb 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c 6e3dbdb ebb3d5e 6e3dbdb 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c ebb3d5e 0e5c14c 6e3dbdb 9860c76 ebb3d5e 9860c76 6e3dbdb 1049797 877e000 6e3dbdb ebb3d5e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
# models/trust_score.py
from .model_loader import load_model
from .logging_config import logger
import re
def generate_trust_score(text, image_analysis, pdf_analysis):
try:
# Start with a much higher base score for legitimate properties
trust_score = 50.0 # Increased from 30.0 to 50.0 to give more reasonable starting point
reasoning_parts = []
# Simple text-based trust indicators
text_lower = str(text).lower()
# CRITICAL: Check for obvious fake data patterns - but be much less punitive
fake_patterns = [
r'^\d+$', # Only numbers (very strict)
r'price.*\d{1,2}', # Very low prices (more lenient)
r'size.*\d{1,2}', # Very small sizes (more lenient)
]
fake_detected = False
for pattern in fake_patterns:
if re.search(pattern, text_lower):
# Only mark as fake if it's extremely obvious
if pattern == r'^\d+$' and len(text.strip()) <= 3:
fake_detected = True
trust_score -= 10 # Reduced penalty from 15 to 10
reasoning_parts.append("Detected suspicious number patterns")
break
# For other patterns, be more lenient
elif pattern in [r'price.*\d{1,2}', r'size.*\d{1,2}']:
# Only mark as fake if multiple patterns are found
continue
# Check for repeated numbers (like "2, 2, 2, 2") - but be much less punitive
numbers = re.findall(r'\b\d+\b', text_lower)
if len(numbers) >= 5: # Increased threshold from 3 to 5
unique_numbers = set(numbers)
if len(unique_numbers) <= 1: # Only if ALL numbers are the same
fake_detected = True
trust_score -= 15 # Reduced penalty from 20 to 15
reasoning_parts.append("Detected repeated number patterns (likely fake data)")
# Check for extremely low values - but be much less punitive
if any(word in text_lower for word in ['₹1', '₹2']): # Only extremely low values
fake_detected = True
trust_score -= 20 # Reduced penalty from 25 to 20
reasoning_parts.append("Detected suspiciously low pricing")
# Check for very small property sizes - but be much less punitive
if any(word in text_lower for word in ['1 sq', '2 sq']): # Only extremely small
fake_detected = True
trust_score -= 15 # Reduced penalty from 20 to 15
reasoning_parts.append("Detected suspiciously small property size")
# Positive trust indicators - Much more generous
positive_indicators = [
'apartment', 'flat', 'house', 'villa', 'bungalow', 'property', 'real estate',
'bedroom', 'bathroom', 'kitchen', 'living', 'dining', 'balcony', 'parking',
'amenities', 'facilities', 'security', 'lift', 'gym', 'pool', 'garden',
'hyderabad', 'mumbai', 'delhi', 'bangalore', 'chennai', 'kolkata', 'pune',
'verified', 'authentic', 'genuine', 'legitimate', 'original', 'certified',
'pg', 'hostel', 'office', 'commercial', 'retail', 'warehouse', 'industrial'
]
negative_indicators = [
'fake', 'fraud', 'scam', 'suspicious', 'doubtful', 'unverified', 'unauthentic',
'illegal', 'unauthorized', 'forged', 'counterfeit', 'bogus', 'phony'
]
positive_count = sum(1 for indicator in positive_indicators if indicator in text_lower)
negative_count = sum(1 for indicator in negative_indicators if indicator in text_lower)
# Adjust score based on indicators - Much more balanced
if positive_count > 0 and not fake_detected:
trust_score += min(25, positive_count * 4) # Increased from 20 to 25
reasoning_parts.append(f"Found {positive_count} positive trust indicators")
if negative_count > 0:
trust_score -= min(20, negative_count * 4) # Reduced penalty from 25 to 20
reasoning_parts.append(f"Found {negative_count} negative trust indicators")
# Image analysis contribution - Much more balanced
if image_analysis:
image_count = len(image_analysis) if isinstance(image_analysis, list) else 1
if image_count > 0:
# Check if images are actually property-related
property_related_count = sum(1 for img in image_analysis if img.get('is_property_related', False))
if property_related_count > 0:
trust_score += min(20, property_related_count * 5) # Increased from 15 to 20
reasoning_parts.append(f"Property has {property_related_count} property-related images")
else:
trust_score -= 10 # Reduced penalty from 15 to 10
reasoning_parts.append("No property-related images detected")
# Bonus for multiple high-quality images
if property_related_count >= 3:
trust_score += 12 # Increased from 8 to 12
reasoning_parts.append("Multiple property images provided")
# PDF analysis contribution - Much more balanced
if pdf_analysis:
pdf_count = len(pdf_analysis) if isinstance(pdf_analysis, list) else 1
if pdf_count > 0:
# Check if documents are actually property-related
property_related_docs = sum(1 for doc in pdf_analysis if doc.get('is_property_related', False))
if property_related_docs > 0:
trust_score += min(20, property_related_docs * 6) # Increased from 15 to 20
reasoning_parts.append(f"Property has {property_related_docs} property-related documents")
else:
trust_score -= 8 # Reduced penalty from 10 to 8
reasoning_parts.append("No property-related documents detected")
# Bonus for multiple documents
if property_related_docs >= 2:
trust_score += 8 # Increased from 5 to 8
reasoning_parts.append("Multiple supporting documents provided")
# Text quality assessment - Much more balanced
if text and len(text) > 200 and not fake_detected:
trust_score += 15 # Increased from 12 to 15
reasoning_parts.append("Detailed property description provided")
elif text and len(text) > 100 and not fake_detected:
trust_score += 10 # Increased from 8 to 10
reasoning_parts.append("Adequate property description provided")
elif len(text) < 50:
trust_score -= 10 # Reduced penalty from 15 to 10
reasoning_parts.append("Very short property description")
# Location quality assessment - Much more balanced
if 'hyderabad' in text_lower or 'mumbai' in text_lower or 'delhi' in text_lower or 'bangalore' in text_lower:
if not fake_detected:
trust_score += 8 # Increased from 5 to 8
reasoning_parts.append("Property in major city")
# Property type assessment - Much more balanced
if any(prop_type in text_lower for prop_type in ['apartment', 'flat', 'house', 'villa', 'bungalow', 'pg', 'office']):
if not fake_detected:
trust_score += 6 # Increased from 4 to 6
reasoning_parts.append("Clear property type mentioned")
# Amenities assessment - Much more balanced
amenities_count = sum(1 for amenity in ['pool', 'gym', 'garden', 'parking', 'security', 'lift', 'balcony']
if amenity in text_lower)
if amenities_count > 0 and not fake_detected:
trust_score += min(12, amenities_count * 3) # Increased from 8 to 12
reasoning_parts.append(f"Property has {amenities_count} amenities mentioned")
# CRITICAL: Additional fake data checks - but be much less punitive
# Check if all major fields are just numbers
numeric_fields = ['property_name', 'bedrooms', 'bathrooms', 'sq_ft', 'market_value']
numeric_count = 0
for field in numeric_fields:
if field in text_lower and re.search(r'\b\d{1,2}\b', text_lower):
numeric_count += 1
if numeric_count >= 4: # Increased threshold from 3 to 4
fake_detected = True
trust_score -= 25 # Reduced penalty from 30 to 25
reasoning_parts.append("Multiple fields contain only numbers (highly suspicious)")
# Ensure minimum score for any valid data
if trust_score < 20 and (image_analysis or pdf_analysis):
trust_score = 20 # Increased minimum score from 10 to 20
# Ensure score is within bounds
trust_score = max(0, min(100, trust_score))
# Create reasoning
if reasoning_parts:
reasoning = ". ".join(reasoning_parts) + "."
else:
reasoning = "Basic trust assessment completed."
return trust_score, reasoning
except Exception as e:
logger.error(f"Error in trust score generation: {str(e)}")
return 35.0, f"Trust analysis failed: {str(e)}" # Increased from 20.0 to 35.0
|