Spaces:
Sleeping
Sleeping
File size: 7,538 Bytes
599c2c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
"""
Validate the quality of generated training examples
"""
import json
import re
from typing import List, Dict, Tuple
def analyze_training_examples(filepath: str) -> Dict:
"""Analyze the quality and characteristics of training examples"""
with open(filepath, 'r', encoding='utf-8') as f:
examples = json.load(f)
analysis = {
'total_examples': len(examples),
'provocative_titles': 0,
'cynical_phrases': 0,
'technical_content': 0,
'negative_analogies': 0,
'avg_article_length': 0,
'style_consistency': 0,
'sample_titles': []
}
# Style indicators
provocative_words = [
'disaster', 'catastrophe', 'crash', 'burn', 'fail', 'collapse', 'meltdown',
'nightmare', 'fiasco', 'debacle', 'train wreck', 'explosion', 'implosion'
]
cynical_phrases = [
'of course', 'naturally', 'predictably', 'unsurprisingly', 'evidently',
'clearly', 'obviously', 'needless to say'
]
negative_analogies = [
'train wreck', 'collision', 'explosion', 'disaster', 'catastrophe',
'meltdown', 'implosion', 'crash', 'carnival barker', 'unicorn'
]
technical_terms = [
'5G', 'RAN', 'AI', 'edge computing', 'automation', 'cloud', 'network',
'operator', 'vendor', 'infrastructure', 'deployment', 'integration'
]
total_length = 0
style_score = 0
for example in examples:
if 'messages' in example and len(example['messages']) >= 3:
content = example['messages'][2]['content']
title_line = content.split('\n\n')[0]
title = title_line[2:] if title_line.startswith('# ') else title_line
# Collect sample titles
if len(analysis['sample_titles']) < 10:
analysis['sample_titles'].append(title)
content_lower = content.lower()
# Check for provocative titles
if any(word in title.lower() for word in provocative_words):
analysis['provocative_titles'] += 1
# Check for cynical phrases
if any(phrase in content_lower for phrase in cynical_phrases):
analysis['cynical_phrases'] += 1
# Check for technical content
if any(term.lower() in content_lower for term in technical_terms):
analysis['technical_content'] += 1
# Check for negative analogies
if any(analogy in content_lower for analogy in negative_analogies):
analysis['negative_analogies'] += 1
# Calculate article length
article_length = len(content)
total_length += article_length
# Style consistency score (0-4 based on presence of key elements)
style_elements = 0
if any(word in title.lower() for word in provocative_words):
style_elements += 1
if any(phrase in content_lower for phrase in cynical_phrases):
style_elements += 1
if any(analogy in content_lower for analogy in negative_analogies):
style_elements += 1
if any(term.lower() in content_lower for term in technical_terms):
style_elements += 1
style_score += style_elements
# Calculate averages and percentages
if examples:
analysis['avg_article_length'] = total_length // len(examples)
analysis['style_consistency'] = (style_score / (len(examples) * 4)) * 100
# Convert counts to percentages
analysis['provocative_titles'] = (analysis['provocative_titles'] / len(examples)) * 100
analysis['cynical_phrases'] = (analysis['cynical_phrases'] / len(examples)) * 100
analysis['technical_content'] = (analysis['technical_content'] / len(examples)) * 100
analysis['negative_analogies'] = (analysis['negative_analogies'] / len(examples)) * 100
return analysis
def print_analysis_report(analysis: Dict):
"""Print a detailed analysis report"""
print("=" * 60)
print("TRAINING EXAMPLES QUALITY ANALYSIS")
print("=" * 60)
print(f"Total Examples: {analysis['total_examples']}")
print(f"Average Article Length: {analysis['avg_article_length']:,} characters")
print()
print("STYLE ANALYSIS:")
print(f" Provocative Titles: {analysis['provocative_titles']:.1f}%")
print(f" Cynical Phrases: {analysis['cynical_phrases']:.1f}%")
print(f" Technical Content: {analysis['technical_content']:.1f}%")
print(f" Negative Analogies: {analysis['negative_analogies']:.1f}%")
print(f" Overall Style Consistency: {analysis['style_consistency']:.1f}%")
print()
print("SAMPLE TITLES:")
for i, title in enumerate(analysis['sample_titles'], 1):
print(f" {i:2d}. {title}")
print()
# Quality assessment
quality_score = (
analysis['provocative_titles'] +
analysis['cynical_phrases'] +
analysis['technical_content'] +
analysis['negative_analogies']
) / 4
print("QUALITY ASSESSMENT:")
if quality_score >= 80:
print(" ✅ EXCELLENT - High-quality examples with strong style consistency")
elif quality_score >= 60:
print(" ✅ GOOD - Solid examples with good style elements")
elif quality_score >= 40:
print(" ⚠️ FAIR - Acceptable but could use improvement")
else:
print(" ❌ POOR - Needs significant improvement")
print(f" Overall Quality Score: {quality_score:.1f}%")
print()
def compare_datasets(original_file: str, new_file: str):
"""Compare original and new datasets"""
print("DATASET COMPARISON:")
print("-" * 40)
original_analysis = analyze_training_examples(original_file)
new_analysis = analyze_training_examples(new_file)
print(f"Original Dataset: {original_analysis['total_examples']} examples")
print(f"Expanded Dataset: {new_analysis['total_examples']} examples")
print(f"New Examples Added: {new_analysis['total_examples'] - original_analysis['total_examples']}")
print()
print("STYLE CONSISTENCY COMPARISON:")
print(f" Original: {original_analysis['style_consistency']:.1f}%")
print(f" Expanded: {new_analysis['style_consistency']:.1f}%")
if new_analysis['style_consistency'] >= original_analysis['style_consistency']:
print(" ✅ Style consistency maintained or improved")
else:
print(" ⚠️ Style consistency decreased")
print()
def main():
"""Main validation function"""
print("Validating training examples quality...\n")
# Analyze the new examples
print("ANALYZING NEW EXAMPLES:")
new_analysis = analyze_training_examples('data/additional_training_examples.json')
print_analysis_report(new_analysis)
# Analyze the expanded dataset
print("ANALYZING EXPANDED DATASET:")
expanded_analysis = analyze_training_examples('data/expanded_train_dataset.json')
print_analysis_report(expanded_analysis)
# Compare with original
try:
compare_datasets('data/train_dataset.json', 'data/expanded_train_dataset.json')
except FileNotFoundError:
print("Original dataset not found for comparison.")
print("=" * 60)
print("VALIDATION COMPLETE")
print("=" * 60)
if __name__ == "__main__":
main()
|