Spaces:

sksameermujahid
/

propertyverification

Sleeping

File size: 6,950 Bytes

# models/text_quality.py

from .model_loader import load_model
from .logging_config import logger

def assess_text_quality(text):
    try:
        # Handle very short or empty text with more reasonable scoring
        if not text or len(str(text).strip()) < 5:
            return {
                'assessment': 'insufficient',
                'score': 5,  # Give minimum score instead of 0
                'reasoning': 'Text too short or empty.',
                'is_ai_generated': False,
                'quality_metrics': {},
                'model_used': 'static_fallback'
            }
        
        # For very short text (5-20 characters), give basic score
        if len(str(text).strip()) < 20:
            return {
                'assessment': 'basic',
                'score': 15,  # Basic score for minimal text
                'reasoning': 'Very short text provided.',
                'is_ai_generated': False,
                'quality_metrics': {
                    'text_length': len(text),
                    'word_count': len(text.split()),
                    'sentence_count': text.count('.') + text.count('!') + text.count('?')
                },
                'model_used': 'static_fallback'
            }
        
        try:
            classifier = load_model("zero-shot-classification")  # Use standard model instead of typeform
        except Exception as e:
            logger.error(f"Error loading model in text quality: {str(e)}")
            # Much more lenient fallback scoring for when model fails
            text_length = len(text)
            if text_length > 200:
                fallback_score = 70  # Increased from 60
                assessment = 'good'
            elif text_length > 100:
                fallback_score = 50  # Increased from 40
                assessment = 'adequate'
            elif text_length > 50:
                fallback_score = 35  # Increased from 25
                assessment = 'basic'
            else:
                fallback_score = 25  # Increased from 15
                assessment = 'basic'
            
            return {
                'assessment': assessment,
                'score': fallback_score,
                'reasoning': f'Model loading error, using fallback scoring based on text length ({text_length} chars).',
                'is_ai_generated': False,
                'quality_metrics': {
                    'text_length': text_length,
                    'word_count': len(text.split()),
                    'sentence_count': text.count('.') + text.count('!') + text.count('?')
                },
                'model_used': 'static_fallback'
            }

        # Enhanced quality categories with more specific indicators
        quality_categories = [
            "detailed and informative",
            "adequately detailed",
            "basic information",
            "vague description",
            "misleading content",
            "professional listing",
            "amateur listing",
            "spam-like content",
            "template-based content",
            "authentic description"
        ]

        # Analyze text with multiple aspects
        quality_result = classifier(text[:1000], quality_categories, multi_label=True)

        # Get top classifications with confidence scores
        top_classifications = []
        for label, score in zip(quality_result['labels'][:3], quality_result['scores'][:3]):
            if score > 0.3:  # Only include if confidence is above 30%
                top_classifications.append({
                    'classification': label,
                    'confidence': float(score)
                })

        # Calculate overall quality score
        positive_categories = ["detailed and informative", "adequately detailed", "professional listing", "authentic description"]
        negative_categories = ["vague description", "misleading content", "amateur listing", "spam-like content", "template-based content"]
        
        positive_score = sum(score for label, score in zip(quality_result['labels'], quality_result['scores']) 
                           if label in positive_categories)
        negative_score = sum(score for label, score in zip(quality_result['labels'], quality_result['scores']) 
                           if label in negative_categories)
        
        # Calculate final score (0-100) with better handling of edge cases
        base_score = (positive_score - negative_score + 1) * 50
        quality_score = max(20, min(100, int(base_score)))  # Increased minimum from 10% to 20%
        
        # Much more lenient assessment thresholds
        if quality_score >= 70:  # Reduced from 80
            assessment = 'excellent'
        elif quality_score >= 50:  # Reduced from 60
            assessment = 'good'
        elif quality_score >= 30:  # Reduced from 40
            assessment = 'adequate'
        elif quality_score >= 20:  # Reduced from 20
            assessment = 'basic'
        else:
            assessment = 'basic'  # Changed from 'very poor' to 'basic'

        # Simple AI detection (basic heuristic)
        is_ai_generated = len(text) > 500 and (
            'beautiful' in text.lower() and 'excellent' in text.lower() and 'prime' in text.lower() or
            text.count('.') > 10 and len(text.split()) > 100
        )

        return {
            'assessment': assessment,
            'score': quality_score,
            'reasoning': f'Quality score: {quality_score}/100 based on {len(top_classifications)} classifications.',
            'is_ai_generated': is_ai_generated,
            'quality_metrics': {
                'text_length': len(text),
                'word_count': len(text.split()),
                'sentence_count': text.count('.') + text.count('!') + text.count('?'),
                'positive_score': positive_score,
                'negative_score': negative_score
            },
            'top_classifications': top_classifications,
            'model_used': getattr(classifier, 'fallback_model', 'primary_model')
        }

    except Exception as e:
        logger.error(f"Error in text quality assessment: {str(e)}")
        # Return much more reasonable fallback instead of 0
        text_length = len(str(text)) if text else 0
        fallback_score = max(25, min(60, text_length // 2 + 20))  # Much more lenient scoring based on length
        
        return {
            'assessment': 'basic',
            'score': fallback_score,
            'reasoning': f'Text quality assessment failed: {str(e)}. Using fallback scoring.',
            'is_ai_generated': False,
            'quality_metrics': {
                'text_length': text_length,
                'word_count': len(str(text).split()) if text else 0,
                'sentence_count': str(text).count('.') + str(text).count('!') + str(text).count('?') if text else 0
            },
            'model_used': 'error_fallback'
        }