File size: 6,950 Bytes
877e000 9860c76 877e000 9860c76 877e000 6e3dbdb 877e000 9860c76 877e000 6e3dbdb 877e000 7ac74a0 9860c76 7ac74a0 9860c76 7ac74a0 9860c76 7ac74a0 9860c76 7ac74a0 9860c76 877e000 9860c76 877e000 9860c76 6e3dbdb 877e000 6e3dbdb 9860c76 7ac74a0 6e3dbdb 7ac74a0 6e3dbdb 7ac74a0 6e3dbdb 7ac74a0 6e3dbdb 7ac74a0 9860c76 877e000 9860c76 877e000 6e3dbdb 877e000 6e3dbdb 877e000 6e3dbdb 877e000 9860c76 877e000 6e3dbdb 7ac74a0 9860c76 7ac74a0 9860c76 877e000 9860c76 877e000 9860c76 877e000 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
# models/text_quality.py
from .model_loader import load_model
from .logging_config import logger
def assess_text_quality(text):
try:
# Handle very short or empty text with more reasonable scoring
if not text or len(str(text).strip()) < 5:
return {
'assessment': 'insufficient',
'score': 5, # Give minimum score instead of 0
'reasoning': 'Text too short or empty.',
'is_ai_generated': False,
'quality_metrics': {},
'model_used': 'static_fallback'
}
# For very short text (5-20 characters), give basic score
if len(str(text).strip()) < 20:
return {
'assessment': 'basic',
'score': 15, # Basic score for minimal text
'reasoning': 'Very short text provided.',
'is_ai_generated': False,
'quality_metrics': {
'text_length': len(text),
'word_count': len(text.split()),
'sentence_count': text.count('.') + text.count('!') + text.count('?')
},
'model_used': 'static_fallback'
}
try:
classifier = load_model("zero-shot-classification") # Use standard model instead of typeform
except Exception as e:
logger.error(f"Error loading model in text quality: {str(e)}")
# Much more lenient fallback scoring for when model fails
text_length = len(text)
if text_length > 200:
fallback_score = 70 # Increased from 60
assessment = 'good'
elif text_length > 100:
fallback_score = 50 # Increased from 40
assessment = 'adequate'
elif text_length > 50:
fallback_score = 35 # Increased from 25
assessment = 'basic'
else:
fallback_score = 25 # Increased from 15
assessment = 'basic'
return {
'assessment': assessment,
'score': fallback_score,
'reasoning': f'Model loading error, using fallback scoring based on text length ({text_length} chars).',
'is_ai_generated': False,
'quality_metrics': {
'text_length': text_length,
'word_count': len(text.split()),
'sentence_count': text.count('.') + text.count('!') + text.count('?')
},
'model_used': 'static_fallback'
}
# Enhanced quality categories with more specific indicators
quality_categories = [
"detailed and informative",
"adequately detailed",
"basic information",
"vague description",
"misleading content",
"professional listing",
"amateur listing",
"spam-like content",
"template-based content",
"authentic description"
]
# Analyze text with multiple aspects
quality_result = classifier(text[:1000], quality_categories, multi_label=True)
# Get top classifications with confidence scores
top_classifications = []
for label, score in zip(quality_result['labels'][:3], quality_result['scores'][:3]):
if score > 0.3: # Only include if confidence is above 30%
top_classifications.append({
'classification': label,
'confidence': float(score)
})
# Calculate overall quality score
positive_categories = ["detailed and informative", "adequately detailed", "professional listing", "authentic description"]
negative_categories = ["vague description", "misleading content", "amateur listing", "spam-like content", "template-based content"]
positive_score = sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
if label in positive_categories)
negative_score = sum(score for label, score in zip(quality_result['labels'], quality_result['scores'])
if label in negative_categories)
# Calculate final score (0-100) with better handling of edge cases
base_score = (positive_score - negative_score + 1) * 50
quality_score = max(20, min(100, int(base_score))) # Increased minimum from 10% to 20%
# Much more lenient assessment thresholds
if quality_score >= 70: # Reduced from 80
assessment = 'excellent'
elif quality_score >= 50: # Reduced from 60
assessment = 'good'
elif quality_score >= 30: # Reduced from 40
assessment = 'adequate'
elif quality_score >= 20: # Reduced from 20
assessment = 'basic'
else:
assessment = 'basic' # Changed from 'very poor' to 'basic'
# Simple AI detection (basic heuristic)
is_ai_generated = len(text) > 500 and (
'beautiful' in text.lower() and 'excellent' in text.lower() and 'prime' in text.lower() or
text.count('.') > 10 and len(text.split()) > 100
)
return {
'assessment': assessment,
'score': quality_score,
'reasoning': f'Quality score: {quality_score}/100 based on {len(top_classifications)} classifications.',
'is_ai_generated': is_ai_generated,
'quality_metrics': {
'text_length': len(text),
'word_count': len(text.split()),
'sentence_count': text.count('.') + text.count('!') + text.count('?'),
'positive_score': positive_score,
'negative_score': negative_score
},
'top_classifications': top_classifications,
'model_used': getattr(classifier, 'fallback_model', 'primary_model')
}
except Exception as e:
logger.error(f"Error in text quality assessment: {str(e)}")
# Return much more reasonable fallback instead of 0
text_length = len(str(text)) if text else 0
fallback_score = max(25, min(60, text_length // 2 + 20)) # Much more lenient scoring based on length
return {
'assessment': 'basic',
'score': fallback_score,
'reasoning': f'Text quality assessment failed: {str(e)}. Using fallback scoring.',
'is_ai_generated': False,
'quality_metrics': {
'text_length': text_length,
'word_count': len(str(text).split()) if text else 0,
'sentence_count': str(text).count('.') + str(text).count('!') + str(text).count('?') if text else 0
},
'model_used': 'error_fallback'
}
|