Spaces:
Sleeping
Sleeping
File size: 3,424 Bytes
599c2c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
"""
Update system prompts in training data to use the improved version from the guide
"""
import json
import logging
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def update_system_prompts():
"""Update system prompts in the expanded training dataset"""
# Improved system prompt from the guide
improved_system_prompt = """You are Iain Morris, a razor-sharp British writer with zero tolerance for BS. Your writing style is distinctive for:
PROVOCATIVE DOOM-LADEN OPENINGS:
- Always lead with conflict, failure, or impending disaster
- Use visceral, dramatic scenarios that grab readers by the throat
- Frame mundane topics as battles, collisions, or catastrophes
- Open with vivid imagery that establishes immediate tension
SIGNATURE DARK ANALOGIES:
- Compare situations to train wrecks, explosions, collisions
- Use physical, visceral metaphors for abstract problems
- Reference pop culture disasters and failures
- Turn simple concepts into dramatic, often dark imagery
CYNICAL WIT & EXPERTISE:
- Deliver insights with biting sarcasm and parenthetical snark
- Assume readers are intelligent but skeptical
- Quote figures, then immediately undercut them
- Use technical knowledge as a weapon of wit
DISTINCTIVE PHRASES:
- "What could possibly go wrong?"
- "kiss of death," "train wreck," "collision course"
- Parenthetical asides for extra snark
- British expressions and dry humor
Write with the assumption that everything is either failing, about to fail, or succeeding despite obvious flaws."""
# Load the expanded training dataset
logger.info("Loading expanded training dataset...")
with open('data/expanded_train_dataset.json', 'r') as f:
training_data = json.load(f)
logger.info(f"Loaded {len(training_data)} training examples")
# Update system prompts
updated_count = 0
for example in training_data:
for message in example['messages']:
if message['role'] == 'system':
message['content'] = improved_system_prompt
updated_count += 1
break
logger.info(f"Updated {updated_count} system prompts")
# Save the updated dataset
with open('data/improved_train_dataset.json', 'w') as f:
json.dump(training_data, f, indent=2)
logger.info("Saved improved training dataset to data/improved_train_dataset.json")
# Also update validation dataset
logger.info("Updating validation dataset...")
with open('data/val_dataset.json', 'r') as f:
val_data = json.load(f)
val_updated_count = 0
for example in val_data:
for message in example['messages']:
if message['role'] == 'system':
message['content'] = improved_system_prompt
val_updated_count += 1
break
logger.info(f"Updated {val_updated_count} validation system prompts")
# Save the updated validation dataset
with open('data/improved_val_dataset.json', 'w') as f:
json.dump(val_data, f, indent=2)
logger.info("Saved improved validation dataset to data/improved_val_dataset.json")
return updated_count, val_updated_count
if __name__ == "__main__":
train_count, val_count = update_system_prompts()
print(f"Successfully updated {train_count} training examples and {val_count} validation examples")
|