# models/text_quality.py from .model_loader import load_model from .logging_config import logger def assess_text_quality(text): try: # Handle very short or empty text with more reasonable scoring if not text or len(str(text).strip()) < 5: return { 'assessment': 'insufficient', 'score': 5, # Give minimum score instead of 0 'reasoning': 'Text too short or empty.', 'is_ai_generated': False, 'quality_metrics': {}, 'model_used': 'static_fallback' } # For very short text (5-20 characters), give basic score if len(str(text).strip()) < 20: return { 'assessment': 'basic', 'score': 15, # Basic score for minimal text 'reasoning': 'Very short text provided.', 'is_ai_generated': False, 'quality_metrics': { 'text_length': len(text), 'word_count': len(text.split()), 'sentence_count': text.count('.') + text.count('!') + text.count('?') }, 'model_used': 'static_fallback' } try: classifier = load_model("zero-shot-classification") # Use standard model instead of typeform except Exception as e: logger.error(f"Error loading model in text quality: {str(e)}") # Much more lenient fallback scoring for when model fails text_length = len(text) if text_length > 200: fallback_score = 70 # Increased from 60 assessment = 'good' elif text_length > 100: fallback_score = 50 # Increased from 40 assessment = 'adequate' elif text_length > 50: fallback_score = 35 # Increased from 25 assessment = 'basic' else: fallback_score = 25 # Increased from 15 assessment = 'basic' return { 'assessment': assessment, 'score': fallback_score, 'reasoning': f'Model loading error, using fallback scoring based on text length ({text_length} chars).', 'is_ai_generated': False, 'quality_metrics': { 'text_length': text_length, 'word_count': len(text.split()), 'sentence_count': text.count('.') + text.count('!') + text.count('?') }, 'model_used': 'static_fallback' } # Enhanced quality categories with more specific indicators quality_categories = [ "detailed and informative", "adequately detailed", "basic information", "vague description", "misleading content", "professional listing", "amateur listing", "spam-like content", "template-based content", "authentic description" ] # Analyze text with multiple aspects quality_result = classifier(text[:1000], quality_categories, multi_label=True) # Get top classifications with confidence scores top_classifications = [] for label, score in zip(quality_result['labels'][:3], quality_result['scores'][:3]): if score > 0.3: # Only include if confidence is above 30% top_classifications.append({ 'classification': label, 'confidence': float(score) }) # Calculate overall quality score positive_categories = ["detailed and informative", "adequately detailed", "professional listing", "authentic description"] negative_categories = ["vague description", "misleading content", "amateur listing", "spam-like content", "template-based content"] positive_score = sum(score for label, score in zip(quality_result['labels'], quality_result['scores']) if label in positive_categories) negative_score = sum(score for label, score in zip(quality_result['labels'], quality_result['scores']) if label in negative_categories) # Calculate final score (0-100) with better handling of edge cases base_score = (positive_score - negative_score + 1) * 50 quality_score = max(20, min(100, int(base_score))) # Increased minimum from 10% to 20% # Much more lenient assessment thresholds if quality_score >= 70: # Reduced from 80 assessment = 'excellent' elif quality_score >= 50: # Reduced from 60 assessment = 'good' elif quality_score >= 30: # Reduced from 40 assessment = 'adequate' elif quality_score >= 20: # Reduced from 20 assessment = 'basic' else: assessment = 'basic' # Changed from 'very poor' to 'basic' # Simple AI detection (basic heuristic) is_ai_generated = len(text) > 500 and ( 'beautiful' in text.lower() and 'excellent' in text.lower() and 'prime' in text.lower() or text.count('.') > 10 and len(text.split()) > 100 ) return { 'assessment': assessment, 'score': quality_score, 'reasoning': f'Quality score: {quality_score}/100 based on {len(top_classifications)} classifications.', 'is_ai_generated': is_ai_generated, 'quality_metrics': { 'text_length': len(text), 'word_count': len(text.split()), 'sentence_count': text.count('.') + text.count('!') + text.count('?'), 'positive_score': positive_score, 'negative_score': negative_score }, 'top_classifications': top_classifications, 'model_used': getattr(classifier, 'fallback_model', 'primary_model') } except Exception as e: logger.error(f"Error in text quality assessment: {str(e)}") # Return much more reasonable fallback instead of 0 text_length = len(str(text)) if text else 0 fallback_score = max(25, min(60, text_length // 2 + 20)) # Much more lenient scoring based on length return { 'assessment': 'basic', 'score': fallback_score, 'reasoning': f'Text quality assessment failed: {str(e)}. Using fallback scoring.', 'is_ai_generated': False, 'quality_metrics': { 'text_length': text_length, 'word_count': len(str(text).split()) if text else 0, 'sentence_count': str(text).count('.') + str(text).count('!') + str(text).count('?') if text else 0 }, 'model_used': 'error_fallback' }