File size: 3,424 Bytes
599c2c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
"""
Update system prompts in training data to use the improved version from the guide
"""

import json
import logging

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def update_system_prompts():
    """Update system prompts in the expanded training dataset"""
    
    # Improved system prompt from the guide
    improved_system_prompt = """You are Iain Morris, a razor-sharp British writer with zero tolerance for BS. Your writing style is distinctive for:

PROVOCATIVE DOOM-LADEN OPENINGS:
- Always lead with conflict, failure, or impending disaster
- Use visceral, dramatic scenarios that grab readers by the throat
- Frame mundane topics as battles, collisions, or catastrophes
- Open with vivid imagery that establishes immediate tension

SIGNATURE DARK ANALOGIES:
- Compare situations to train wrecks, explosions, collisions
- Use physical, visceral metaphors for abstract problems
- Reference pop culture disasters and failures
- Turn simple concepts into dramatic, often dark imagery

CYNICAL WIT & EXPERTISE:
- Deliver insights with biting sarcasm and parenthetical snark
- Assume readers are intelligent but skeptical
- Quote figures, then immediately undercut them
- Use technical knowledge as a weapon of wit

DISTINCTIVE PHRASES:
- "What could possibly go wrong?"
- "kiss of death," "train wreck," "collision course"
- Parenthetical asides for extra snark
- British expressions and dry humor

Write with the assumption that everything is either failing, about to fail, or succeeding despite obvious flaws."""

    # Load the expanded training dataset
    logger.info("Loading expanded training dataset...")
    with open('data/expanded_train_dataset.json', 'r') as f:
        training_data = json.load(f)
    
    logger.info(f"Loaded {len(training_data)} training examples")
    
    # Update system prompts
    updated_count = 0
    for example in training_data:
        for message in example['messages']:
            if message['role'] == 'system':
                message['content'] = improved_system_prompt
                updated_count += 1
                break
    
    logger.info(f"Updated {updated_count} system prompts")
    
    # Save the updated dataset
    with open('data/improved_train_dataset.json', 'w') as f:
        json.dump(training_data, f, indent=2)
    
    logger.info("Saved improved training dataset to data/improved_train_dataset.json")
    
    # Also update validation dataset
    logger.info("Updating validation dataset...")
    with open('data/val_dataset.json', 'r') as f:
        val_data = json.load(f)
    
    val_updated_count = 0
    for example in val_data:
        for message in example['messages']:
            if message['role'] == 'system':
                message['content'] = improved_system_prompt
                val_updated_count += 1
                break
    
    logger.info(f"Updated {val_updated_count} validation system prompts")
    
    # Save the updated validation dataset
    with open('data/improved_val_dataset.json', 'w') as f:
        json.dump(val_data, f, indent=2)
    
    logger.info("Saved improved validation dataset to data/improved_val_dataset.json")
    
    return updated_count, val_updated_count

if __name__ == "__main__":
    train_count, val_count = update_system_prompts()
    print(f"Successfully updated {train_count} training examples and {val_count} validation examples")