File size: 9,587 Bytes
877e000
 
 
 
0e5c14c
877e000
 
 
ebb3d5e
 
877e000
 
6e3dbdb
 
 
ebb3d5e
0e5c14c
ebb3d5e
 
 
0e5c14c
 
 
 
 
ebb3d5e
 
 
 
 
 
 
 
 
 
 
 
0e5c14c
ebb3d5e
0e5c14c
ebb3d5e
0e5c14c
ebb3d5e
0e5c14c
 
ebb3d5e
 
0e5c14c
ebb3d5e
0e5c14c
 
ebb3d5e
 
0e5c14c
ebb3d5e
9860c76
 
ebb3d5e
6e3dbdb
9860c76
 
 
 
ebb3d5e
 
6e3dbdb
 
 
9860c76
 
6e3dbdb
 
 
 
 
ebb3d5e
0e5c14c
ebb3d5e
6e3dbdb
 
 
ebb3d5e
6e3dbdb
 
ebb3d5e
6e3dbdb
 
 
0e5c14c
 
 
ebb3d5e
0e5c14c
 
ebb3d5e
0e5c14c
 
 
 
ebb3d5e
0e5c14c
6e3dbdb
ebb3d5e
6e3dbdb
 
 
0e5c14c
 
 
ebb3d5e
0e5c14c
 
ebb3d5e
0e5c14c
 
 
 
ebb3d5e
0e5c14c
 
ebb3d5e
0e5c14c
ebb3d5e
0e5c14c
 
ebb3d5e
0e5c14c
 
ebb3d5e
0e5c14c
 
ebb3d5e
0e5c14c
 
ebb3d5e
0e5c14c
 
ebb3d5e
 
0e5c14c
ebb3d5e
0e5c14c
 
ebb3d5e
0e5c14c
 
 
ebb3d5e
0e5c14c
 
ebb3d5e
0e5c14c
 
 
 
 
 
 
ebb3d5e
0e5c14c
ebb3d5e
0e5c14c
6e3dbdb
9860c76
ebb3d5e
 
9860c76
6e3dbdb
 
 
 
 
 
 
 
 
 
1049797
877e000
6e3dbdb
ebb3d5e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# models/trust_score.py

from .model_loader import load_model
from .logging_config import logger
import re

def generate_trust_score(text, image_analysis, pdf_analysis):
    try:
        # Start with a much higher base score for legitimate properties
        trust_score = 50.0  # Increased from 30.0 to 50.0 to give more reasonable starting point
        reasoning_parts = []

        # Simple text-based trust indicators
        text_lower = str(text).lower()
        
        # CRITICAL: Check for obvious fake data patterns - but be much less punitive
        fake_patterns = [
            r'^\d+$',      # Only numbers (very strict)
            r'price.*\d{1,2}',  # Very low prices (more lenient)
            r'size.*\d{1,2}',   # Very small sizes (more lenient)
        ]
        
        fake_detected = False
        for pattern in fake_patterns:
            if re.search(pattern, text_lower):
                # Only mark as fake if it's extremely obvious
                if pattern == r'^\d+$' and len(text.strip()) <= 3:
                    fake_detected = True
                    trust_score -= 10  # Reduced penalty from 15 to 10
                    reasoning_parts.append("Detected suspicious number patterns")
                    break
                # For other patterns, be more lenient
                elif pattern in [r'price.*\d{1,2}', r'size.*\d{1,2}']:
                    # Only mark as fake if multiple patterns are found
                    continue
        
        # Check for repeated numbers (like "2, 2, 2, 2") - but be much less punitive
        numbers = re.findall(r'\b\d+\b', text_lower)
        if len(numbers) >= 5:  # Increased threshold from 3 to 5
            unique_numbers = set(numbers)
            if len(unique_numbers) <= 1:  # Only if ALL numbers are the same
                fake_detected = True
                trust_score -= 15  # Reduced penalty from 20 to 15
                reasoning_parts.append("Detected repeated number patterns (likely fake data)")
        
        # Check for extremely low values - but be much less punitive
        if any(word in text_lower for word in ['₹1', '₹2']):  # Only extremely low values
            fake_detected = True
            trust_score -= 20  # Reduced penalty from 25 to 20
            reasoning_parts.append("Detected suspiciously low pricing")
        
        # Check for very small property sizes - but be much less punitive
        if any(word in text_lower for word in ['1 sq', '2 sq']):  # Only extremely small
            fake_detected = True
            trust_score -= 15  # Reduced penalty from 20 to 15
            reasoning_parts.append("Detected suspiciously small property size")

        # Positive trust indicators - Much more generous
        positive_indicators = [
            'apartment', 'flat', 'house', 'villa', 'bungalow', 'property', 'real estate',
            'bedroom', 'bathroom', 'kitchen', 'living', 'dining', 'balcony', 'parking',
            'amenities', 'facilities', 'security', 'lift', 'gym', 'pool', 'garden',
            'hyderabad', 'mumbai', 'delhi', 'bangalore', 'chennai', 'kolkata', 'pune',
            'verified', 'authentic', 'genuine', 'legitimate', 'original', 'certified',
            'pg', 'hostel', 'office', 'commercial', 'retail', 'warehouse', 'industrial'
        ]
        
        negative_indicators = [
            'fake', 'fraud', 'scam', 'suspicious', 'doubtful', 'unverified', 'unauthentic',
            'illegal', 'unauthorized', 'forged', 'counterfeit', 'bogus', 'phony'
        ]
        
        positive_count = sum(1 for indicator in positive_indicators if indicator in text_lower)
        negative_count = sum(1 for indicator in negative_indicators if indicator in text_lower)
        
        # Adjust score based on indicators - Much more balanced
        if positive_count > 0 and not fake_detected:
            trust_score += min(25, positive_count * 4)  # Increased from 20 to 25
            reasoning_parts.append(f"Found {positive_count} positive trust indicators")
        
        if negative_count > 0:
            trust_score -= min(20, negative_count * 4)  # Reduced penalty from 25 to 20
            reasoning_parts.append(f"Found {negative_count} negative trust indicators")
        
        # Image analysis contribution - Much more balanced
        if image_analysis:
            image_count = len(image_analysis) if isinstance(image_analysis, list) else 1
            if image_count > 0:
                # Check if images are actually property-related
                property_related_count = sum(1 for img in image_analysis if img.get('is_property_related', False))
                if property_related_count > 0:
                    trust_score += min(20, property_related_count * 5)  # Increased from 15 to 20
                    reasoning_parts.append(f"Property has {property_related_count} property-related images")
                else:
                    trust_score -= 10  # Reduced penalty from 15 to 10
                    reasoning_parts.append("No property-related images detected")
                
                # Bonus for multiple high-quality images
                if property_related_count >= 3:
                    trust_score += 12  # Increased from 8 to 12
                    reasoning_parts.append("Multiple property images provided")
        
        # PDF analysis contribution - Much more balanced
        if pdf_analysis:
            pdf_count = len(pdf_analysis) if isinstance(pdf_analysis, list) else 1
            if pdf_count > 0:
                # Check if documents are actually property-related
                property_related_docs = sum(1 for doc in pdf_analysis if doc.get('is_property_related', False))
                if property_related_docs > 0:
                    trust_score += min(20, property_related_docs * 6)  # Increased from 15 to 20
                    reasoning_parts.append(f"Property has {property_related_docs} property-related documents")
                else:
                    trust_score -= 8  # Reduced penalty from 10 to 8
                    reasoning_parts.append("No property-related documents detected")
                
                # Bonus for multiple documents
                if property_related_docs >= 2:
                    trust_score += 8  # Increased from 5 to 8
                    reasoning_parts.append("Multiple supporting documents provided")
        
        # Text quality assessment - Much more balanced
        if text and len(text) > 200 and not fake_detected:
            trust_score += 15  # Increased from 12 to 15
            reasoning_parts.append("Detailed property description provided")
        elif text and len(text) > 100 and not fake_detected:
            trust_score += 10  # Increased from 8 to 10
            reasoning_parts.append("Adequate property description provided")
        elif len(text) < 50:
            trust_score -= 10  # Reduced penalty from 15 to 10
            reasoning_parts.append("Very short property description")
        
        # Location quality assessment - Much more balanced
        if 'hyderabad' in text_lower or 'mumbai' in text_lower or 'delhi' in text_lower or 'bangalore' in text_lower:
            if not fake_detected:
                trust_score += 8  # Increased from 5 to 8
                reasoning_parts.append("Property in major city")
        
        # Property type assessment - Much more balanced
        if any(prop_type in text_lower for prop_type in ['apartment', 'flat', 'house', 'villa', 'bungalow', 'pg', 'office']):
            if not fake_detected:
                trust_score += 6  # Increased from 4 to 6
                reasoning_parts.append("Clear property type mentioned")
        
        # Amenities assessment - Much more balanced
        amenities_count = sum(1 for amenity in ['pool', 'gym', 'garden', 'parking', 'security', 'lift', 'balcony'] 
                             if amenity in text_lower)
        if amenities_count > 0 and not fake_detected:
            trust_score += min(12, amenities_count * 3)  # Increased from 8 to 12
            reasoning_parts.append(f"Property has {amenities_count} amenities mentioned")
        
        # CRITICAL: Additional fake data checks - but be much less punitive
        # Check if all major fields are just numbers
        numeric_fields = ['property_name', 'bedrooms', 'bathrooms', 'sq_ft', 'market_value']
        numeric_count = 0
        for field in numeric_fields:
            if field in text_lower and re.search(r'\b\d{1,2}\b', text_lower):
                numeric_count += 1
        
        if numeric_count >= 4:  # Increased threshold from 3 to 4
            fake_detected = True
            trust_score -= 25  # Reduced penalty from 30 to 25
            reasoning_parts.append("Multiple fields contain only numbers (highly suspicious)")
        
        # Ensure minimum score for any valid data
        if trust_score < 20 and (image_analysis or pdf_analysis):
            trust_score = 20  # Increased minimum score from 10 to 20
        
        # Ensure score is within bounds
        trust_score = max(0, min(100, trust_score))
        
        # Create reasoning
        if reasoning_parts:
            reasoning = ". ".join(reasoning_parts) + "."
        else:
            reasoning = "Basic trust assessment completed."
        
        return trust_score, reasoning
        
    except Exception as e:
        logger.error(f"Error in trust score generation: {str(e)}")
        return 35.0, f"Trust analysis failed: {str(e)}"  # Increased from 20.0 to 35.0