File size: 6,950 Bytes
877e000
 
 
 
 
 
 
9860c76
 
877e000
 
9860c76
 
877e000
6e3dbdb
 
877e000
9860c76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
877e000
6e3dbdb
877e000
 
7ac74a0
9860c76
 
7ac74a0
9860c76
 
7ac74a0
9860c76
 
7ac74a0
9860c76
 
7ac74a0
9860c76
 
877e000
9860c76
 
 
877e000
9860c76
 
 
 
 
6e3dbdb
877e000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e3dbdb
 
 
 
 
 
 
 
 
9860c76
 
7ac74a0
6e3dbdb
7ac74a0
 
6e3dbdb
7ac74a0
6e3dbdb
7ac74a0
6e3dbdb
7ac74a0
9860c76
877e000
9860c76
877e000
6e3dbdb
 
 
 
 
877e000
 
6e3dbdb
 
 
877e000
6e3dbdb
 
 
 
 
 
 
 
 
877e000
9860c76
877e000
6e3dbdb
7ac74a0
9860c76
7ac74a0
9860c76
877e000
9860c76
 
 
877e000
9860c76
 
 
 
 
 
877e000
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# models/text_quality.py

from .model_loader import load_model
from .logging_config import logger

def assess_text_quality(text):
    try:
        # Handle very short or empty text with more reasonable scoring
        if not text or len(str(text).strip()) < 5:
            return {
                'assessment': 'insufficient',
                'score': 5,  # Give minimum score instead of 0
                'reasoning': 'Text too short or empty.',
                'is_ai_generated': False,
                'quality_metrics': {},
                'model_used': 'static_fallback'
            }
        
        # For very short text (5-20 characters), give basic score
        if len(str(text).strip()) < 20:
            return {
                'assessment': 'basic',
                'score': 15,  # Basic score for minimal text
                'reasoning': 'Very short text provided.',
                'is_ai_generated': False,
                'quality_metrics': {
                    'text_length': len(text),
                    'word_count': len(text.split()),
                    'sentence_count': text.count('.') + text.count('!') + text.count('?')
                },
                'model_used': 'static_fallback'
            }
        
        try:
            classifier = load_model("zero-shot-classification")  # Use standard model instead of typeform
        except Exception as e:
            logger.error(f"Error loading model in text quality: {str(e)}")
            # Much more lenient fallback scoring for when model fails
            text_length = len(text)
            if text_length > 200:
                fallback_score = 70  # Increased from 60
                assessment = 'good'
            elif text_length > 100:
                fallback_score = 50  # Increased from 40
                assessment = 'adequate'
            elif text_length > 50:
                fallback_score = 35  # Increased from 25
                assessment = 'basic'
            else:
                fallback_score = 25  # Increased from 15
                assessment = 'basic'
            
            return {
                'assessment': assessment,
                'score': fallback_score,
                'reasoning': f'Model loading error, using fallback scoring based on text length ({text_length} chars).',
                'is_ai_generated': False,
                'quality_metrics': {
                    'text_length': text_length,
                    'word_count': len(text.split()),
                    'sentence_count': text.count('.') + text.count('!') + text.count('?')
                },
                'model_used': 'static_fallback'
            }

        # Enhanced quality categories with more specific indicators
        quality_categories = [
            "detailed and informative",
            "adequately detailed",
            "basic information",
            "vague description",
            "misleading content",
            "professional listing",
            "amateur listing",
            "spam-like content",
            "template-based content",
            "authentic description"
        ]

        # Analyze text with multiple aspects
        quality_result = classifier(text[:1000], quality_categories, multi_label=True)

        # Get top classifications with confidence scores
        top_classifications = []
        for label, score in zip(quality_result['labels'][:3], quality_result['scores'][:3]):
            if score > 0.3:  # Only include if confidence is above 30%
                top_classifications.append({
                    'classification': label,
                    'confidence': float(score)
                })

        # Calculate overall quality score
        positive_categories = ["detailed and informative", "adequately detailed", "professional listing", "authentic description"]
        negative_categories = ["vague description", "misleading content", "amateur listing", "spam-like content", "template-based content"]
        
        positive_score = sum(score for label, score in zip(quality_result['labels'], quality_result['scores']) 
                           if label in positive_categories)
        negative_score = sum(score for label, score in zip(quality_result['labels'], quality_result['scores']) 
                           if label in negative_categories)
        
        # Calculate final score (0-100) with better handling of edge cases
        base_score = (positive_score - negative_score + 1) * 50
        quality_score = max(20, min(100, int(base_score)))  # Increased minimum from 10% to 20%
        
        # Much more lenient assessment thresholds
        if quality_score >= 70:  # Reduced from 80
            assessment = 'excellent'
        elif quality_score >= 50:  # Reduced from 60
            assessment = 'good'
        elif quality_score >= 30:  # Reduced from 40
            assessment = 'adequate'
        elif quality_score >= 20:  # Reduced from 20
            assessment = 'basic'
        else:
            assessment = 'basic'  # Changed from 'very poor' to 'basic'

        # Simple AI detection (basic heuristic)
        is_ai_generated = len(text) > 500 and (
            'beautiful' in text.lower() and 'excellent' in text.lower() and 'prime' in text.lower() or
            text.count('.') > 10 and len(text.split()) > 100
        )

        return {
            'assessment': assessment,
            'score': quality_score,
            'reasoning': f'Quality score: {quality_score}/100 based on {len(top_classifications)} classifications.',
            'is_ai_generated': is_ai_generated,
            'quality_metrics': {
                'text_length': len(text),
                'word_count': len(text.split()),
                'sentence_count': text.count('.') + text.count('!') + text.count('?'),
                'positive_score': positive_score,
                'negative_score': negative_score
            },
            'top_classifications': top_classifications,
            'model_used': getattr(classifier, 'fallback_model', 'primary_model')
        }

    except Exception as e:
        logger.error(f"Error in text quality assessment: {str(e)}")
        # Return much more reasonable fallback instead of 0
        text_length = len(str(text)) if text else 0
        fallback_score = max(25, min(60, text_length // 2 + 20))  # Much more lenient scoring based on length
        
        return {
            'assessment': 'basic',
            'score': fallback_score,
            'reasoning': f'Text quality assessment failed: {str(e)}. Using fallback scoring.',
            'is_ai_generated': False,
            'quality_metrics': {
                'text_length': text_length,
                'word_count': len(str(text).split()) if text else 0,
                'sentence_count': str(text).count('.') + str(text).count('!') + str(text).count('?') if text else 0
            },
            'model_used': 'error_fallback'
        }