# models/trust_score.py

from .model_loader import load_model
from .logging_config import logger
import re

def generate_trust_score(text, image_analysis, pdf_analysis):
    try:
        # Start with a much higher base score for legitimate properties
        trust_score = 50.0  # Increased from 30.0 to 50.0 to give more reasonable starting point
        reasoning_parts = []

        # Simple text-based trust indicators
        text_lower = str(text).lower()
        
        # CRITICAL: Check for obvious fake data patterns - but be much less punitive
        fake_patterns = [
            r'^\d+$',      # Only numbers (very strict)
            r'price.*\d{1,2}',  # Very low prices (more lenient)
            r'size.*\d{1,2}',   # Very small sizes (more lenient)
        ]
        
        fake_detected = False
        for pattern in fake_patterns:
            if re.search(pattern, text_lower):
                # Only mark as fake if it's extremely obvious
                if pattern == r'^\d+$' and len(text.strip()) <= 3:
                    fake_detected = True
                    trust_score -= 10  # Reduced penalty from 15 to 10
                    reasoning_parts.append("Detected suspicious number patterns")
                    break
                # For other patterns, be more lenient
                elif pattern in [r'price.*\d{1,2}', r'size.*\d{1,2}']:
                    # Only mark as fake if multiple patterns are found
                    continue
        
        # Check for repeated numbers (like "2, 2, 2, 2") - but be much less punitive
        numbers = re.findall(r'\b\d+\b', text_lower)
        if len(numbers) >= 5:  # Increased threshold from 3 to 5
            unique_numbers = set(numbers)
            if len(unique_numbers) <= 1:  # Only if ALL numbers are the same
                fake_detected = True
                trust_score -= 15  # Reduced penalty from 20 to 15
                reasoning_parts.append("Detected repeated number patterns (likely fake data)")
        
        # Check for extremely low values - but be much less punitive
        if any(word in text_lower for word in ['₹1', '₹2']):  # Only extremely low values
            fake_detected = True
            trust_score -= 20  # Reduced penalty from 25 to 20
            reasoning_parts.append("Detected suspiciously low pricing")
        
        # Check for very small property sizes - but be much less punitive
        if any(word in text_lower for word in ['1 sq', '2 sq']):  # Only extremely small
            fake_detected = True
            trust_score -= 15  # Reduced penalty from 20 to 15
            reasoning_parts.append("Detected suspiciously small property size")

        # Positive trust indicators - Much more generous
        positive_indicators = [
            'apartment', 'flat', 'house', 'villa', 'bungalow', 'property', 'real estate',
            'bedroom', 'bathroom', 'kitchen', 'living', 'dining', 'balcony', 'parking',
            'amenities', 'facilities', 'security', 'lift', 'gym', 'pool', 'garden',
            'hyderabad', 'mumbai', 'delhi', 'bangalore', 'chennai', 'kolkata', 'pune',
            'verified', 'authentic', 'genuine', 'legitimate', 'original', 'certified',
            'pg', 'hostel', 'office', 'commercial', 'retail', 'warehouse', 'industrial'
        ]
        
        negative_indicators = [
            'fake', 'fraud', 'scam', 'suspicious', 'doubtful', 'unverified', 'unauthentic',
            'illegal', 'unauthorized', 'forged', 'counterfeit', 'bogus', 'phony'
        ]
        
        positive_count = sum(1 for indicator in positive_indicators if indicator in text_lower)
        negative_count = sum(1 for indicator in negative_indicators if indicator in text_lower)
        
        # Adjust score based on indicators - Much more balanced
        if positive_count > 0 and not fake_detected:
            trust_score += min(25, positive_count * 4)  # Increased from 20 to 25
            reasoning_parts.append(f"Found {positive_count} positive trust indicators")
        
        if negative_count > 0:
            trust_score -= min(20, negative_count * 4)  # Reduced penalty from 25 to 20
            reasoning_parts.append(f"Found {negative_count} negative trust indicators")
        
        # Image analysis contribution - Much more balanced
        if image_analysis:
            image_count = len(image_analysis) if isinstance(image_analysis, list) else 1
            if image_count > 0:
                # Check if images are actually property-related
                property_related_count = sum(1 for img in image_analysis if img.get('is_property_related', False))
                if property_related_count > 0:
                    trust_score += min(20, property_related_count * 5)  # Increased from 15 to 20
                    reasoning_parts.append(f"Property has {property_related_count} property-related images")
                else:
                    trust_score -= 10  # Reduced penalty from 15 to 10
                    reasoning_parts.append("No property-related images detected")
                
                # Bonus for multiple high-quality images
                if property_related_count >= 3:
                    trust_score += 12  # Increased from 8 to 12
                    reasoning_parts.append("Multiple property images provided")
        
        # PDF analysis contribution - Much more balanced
        if pdf_analysis:
            pdf_count = len(pdf_analysis) if isinstance(pdf_analysis, list) else 1
            if pdf_count > 0:
                # Check if documents are actually property-related
                property_related_docs = sum(1 for doc in pdf_analysis if doc.get('is_property_related', False))
                if property_related_docs > 0:
                    trust_score += min(20, property_related_docs * 6)  # Increased from 15 to 20
                    reasoning_parts.append(f"Property has {property_related_docs} property-related documents")
                else:
                    trust_score -= 8  # Reduced penalty from 10 to 8
                    reasoning_parts.append("No property-related documents detected")
                
                # Bonus for multiple documents
                if property_related_docs >= 2:
                    trust_score += 8  # Increased from 5 to 8
                    reasoning_parts.append("Multiple supporting documents provided")
        
        # Text quality assessment - Much more balanced
        if text and len(text) > 200 and not fake_detected:
            trust_score += 15  # Increased from 12 to 15
            reasoning_parts.append("Detailed property description provided")
        elif text and len(text) > 100 and not fake_detected:
            trust_score += 10  # Increased from 8 to 10
            reasoning_parts.append("Adequate property description provided")
        elif len(text) < 50:
            trust_score -= 10  # Reduced penalty from 15 to 10
            reasoning_parts.append("Very short property description")
        
        # Location quality assessment - Much more balanced
        if 'hyderabad' in text_lower or 'mumbai' in text_lower or 'delhi' in text_lower or 'bangalore' in text_lower:
            if not fake_detected:
                trust_score += 8  # Increased from 5 to 8
                reasoning_parts.append("Property in major city")
        
        # Property type assessment - Much more balanced
        if any(prop_type in text_lower for prop_type in ['apartment', 'flat', 'house', 'villa', 'bungalow', 'pg', 'office']):
            if not fake_detected:
                trust_score += 6  # Increased from 4 to 6
                reasoning_parts.append("Clear property type mentioned")
        
        # Amenities assessment - Much more balanced
        amenities_count = sum(1 for amenity in ['pool', 'gym', 'garden', 'parking', 'security', 'lift', 'balcony'] 
                             if amenity in text_lower)
        if amenities_count > 0 and not fake_detected:
            trust_score += min(12, amenities_count * 3)  # Increased from 8 to 12
            reasoning_parts.append(f"Property has {amenities_count} amenities mentioned")
        
        # CRITICAL: Additional fake data checks - but be much less punitive
        # Check if all major fields are just numbers
        numeric_fields = ['property_name', 'bedrooms', 'bathrooms', 'sq_ft', 'market_value']
        numeric_count = 0
        for field in numeric_fields:
            if field in text_lower and re.search(r'\b\d{1,2}\b', text_lower):
                numeric_count += 1
        
        if numeric_count >= 4:  # Increased threshold from 3 to 4
            fake_detected = True
            trust_score -= 25  # Reduced penalty from 30 to 25
            reasoning_parts.append("Multiple fields contain only numbers (highly suspicious)")
        
        # Ensure minimum score for any valid data
        if trust_score < 20 and (image_analysis or pdf_analysis):
            trust_score = 20  # Increased minimum score from 10 to 20
        
        # Ensure score is within bounds
        trust_score = max(0, min(100, trust_score))
        
        # Create reasoning
        if reasoning_parts:
            reasoning = ". ".join(reasoning_parts) + "."
        else:
            reasoning = "Basic trust assessment completed."
        
        return trust_score, reasoning
        
    except Exception as e:
        logger.error(f"Error in trust score generation: {str(e)}")
        return 35.0, f"Trust analysis failed: {str(e)}"  # Increased from 20.0 to 35.0