# models/fraud_classification.py

from .model_loader import load_model
from .logging_config import logger
import re

def classify_fraud(property_details, description):
    """
    Classify the fraud risk of a property listing using AI.
    """
    try:
        # Combine property details and description for analysis
        text_to_analyze = f"{property_details} {description}"
        
        # CRITICAL: Check for obvious fake data patterns first - Much more lenient
        fake_patterns = [
            r'^\d+$',      # Only numbers (very strict)
            r'price.*\d{1,2}',  # Very low prices (more lenient)
            r'size.*\d{1,2}',   # Very small sizes (more lenient)
        ]
        
        fake_detected = False
        for pattern in fake_patterns:
            if re.search(pattern, text_to_analyze.lower()):
                # Only mark as fake if it's extremely obvious
                if pattern == r'^\d+$' and len(text_to_analyze.strip()) <= 3:
                    fake_detected = True
                    break
                # For other patterns, be more lenient
                elif pattern in [r'price.*\d{1,2}', r'size.*\d{1,2}']:
                    # Only mark as fake if multiple patterns are found
                    continue
        
        # Check for repeated numbers (like "2, 2, 2, 2") - Much more lenient
        numbers = re.findall(r'\b\d+\b', text_to_analyze.lower())
        if len(numbers) >= 5:  # Increased threshold from 3 to 5
            unique_numbers = set(numbers)
            if len(unique_numbers) <= 1:  # Only if ALL numbers are the same
                fake_detected = True
        
        # Check for extremely low values - Much more lenient
        if any(word in text_to_analyze.lower() for word in ['₹1', '₹2']):  # Only extremely low values
            fake_detected = True
        
        # Check for very small property sizes - Much more lenient
        if any(word in text_to_analyze.lower() for word in ['1 sq ft', '2 sq ft']):  # Only extremely small
            fake_detected = True
        
        # If fake data is detected, return moderate fraud score instead of high
        if fake_detected:
            return {
                'alert_level': 'medium',  # Changed from 'high' to 'medium'
                'alert_score': 0.6,  # Reduced from 0.9 to 0.6
                'confidence_scores': {
                    'high risk listing': 0.6,  # Reduced from 0.9
                    'potential fraud': 0.5,    # Reduced from 0.8
                    'suspicious listing': 0.4,  # Reduced from 0.7
                    'legitimate listing': 0.2   # Increased from 0.1
                },
                'high_risk': ['Fake data patterns detected'],
                'medium_risk': [],
                'low_risk': [],
                'reasoning': 'This property was classified as medium risk due to detected fake data patterns.'
            }
        
        # Use a more lenient classification approach for legitimate-looking data
        classifier = load_model("zero-shot-classification", "facebook/bart-large-mnli")
        
        # More balanced risk categories
        risk_categories = [
            "legitimate listing",
            "suspicious listing", 
            "potential fraud",
            "high risk listing"
        ]
        
        # Classify the text
        result = classifier(text_to_analyze[:1000], risk_categories, multi_label=False)
        
        fraud_classification = {
            'alert_level': 'minimal',
            'alert_score': 0.0,
            'confidence_scores': {},
            'high_risk': [],
            'medium_risk': [],
            'low_risk': [],
            'reasoning': ''
        }
        
        # Process classification results - Much more lenient for legitimate data
        fraud_score = 0.0
        if isinstance(result, dict) and 'scores' in result:
            for label, score in zip(result.get('labels', []), result.get('scores', [])):
                if label != "legitimate listing":
                    try:
                        score_val = float(score)
                        # Much more lenient reduction of suspicious classifications
                        if label == "suspicious listing":
                            score_val *= 0.3  # Reduced from 0.5 to 0.3
                        elif label == "potential fraud":
                            score_val *= 0.5  # Reduced from 0.7 to 0.5
                        elif label == "high risk listing":
                            score_val *= 0.6  # Reduced from 0.8 to 0.6
                    except Exception:
                        score_val = 0.0
                    fraud_score += score_val
                    fraud_classification['confidence_scores'][label] = score_val
        else:
            # Handle fallback result
            fraud_score = 0.02  # Reduced from 0.05 to 0.02

        # Normalize fraud score to 0-1 range with much more lenient scaling
        try:
            fraud_score = min(1.0, fraud_score / (len(risk_categories) - 1) * 0.5)  # Reduced by 50%
        except Exception:
            fraud_score = 0.0
        fraud_classification['alert_score'] = fraud_score
        
        # Determine alert level with much more lenient thresholds
        if fraud_score >= 0.8:  # Increased from 0.7
            fraud_classification['alert_level'] = 'high'
        elif fraud_score >= 0.5:  # Increased from 0.4
            fraud_classification['alert_level'] = 'medium'
        elif fraud_score >= 0.3:  # Increased from 0.2
            fraud_classification['alert_level'] = 'low'
        else:
            fraud_classification['alert_level'] = 'minimal'
        
        # Generate reasoning based on scores
        reasoning_parts = []
        
        if fraud_score < 0.3:
            reasoning_parts.append("This property was classified as legitimate based on AI analysis of the listing details.")
        elif fraud_score < 0.5:
            reasoning_parts.append("This property was classified as low risk based on AI analysis of the listing details.")
        elif fraud_score < 0.8:
            reasoning_parts.append("This property was classified as medium risk based on AI analysis of the listing details.")
        else:
            reasoning_parts.append("This property was classified as high risk based on AI analysis of the listing details.")
        
        # Add specific risk indicators if any
        if fraud_classification['confidence_scores']:
            highest_risk = max(fraud_classification['confidence_scores'].items(), key=lambda x: x[1])
            if highest_risk[1] > 0.4:  # Increased threshold from 0.3 to 0.4
                reasoning_parts.append(f"Primary concern: {highest_risk[0]} (confidence: {highest_risk[1]:.0%})")
        
        fraud_classification['reasoning'] = " ".join(reasoning_parts)
        
        return fraud_classification
        
    except Exception as e:
        logger.error(f"Error in fraud classification: {str(e)}")
        return {
            'alert_level': 'minimal',
            'alert_score': 0.02,  # Reduced from 0.05 to 0.02
            'confidence_scores': {},
            'high_risk': [],
            'medium_risk': [],
            'low_risk': [],
            'reasoning': f'Fraud analysis failed: {str(e)}'
        }

def simple_fraud_classification(text, categories):
    """
    Simple keyword-based fraud classification fallback.
    """
    text_lower = text.lower()
    
    # Define keywords for each category
    category_keywords = {
        "fraudulent listing": ["fake", "scam", "fraud", "illegal", "unauthorized"],
        "misleading information": ["misleading", "false", "wrong", "incorrect", "fake"],
        "fake property": ["fake", "non-existent", "virtual", "photoshopped"],
        "scam attempt": ["scam", "fraud", "cheat", "trick", "deceive"],
        "legitimate listing": ["real", "genuine", "authentic", "verified", "legitimate"]
    }
    
    scores = []
    for category in categories:
        keywords = category_keywords.get(category, [])
        score = sum(1 for keyword in keywords if keyword in text_lower) / len(keywords) if keywords else 0.1
        scores.append(min(1.0, score))
    
    return {
        "labels": categories,
        "scores": scores
    }