Spaces:
Sleeping
Sleeping
""" | |
Update system prompts in training data to use the improved version from the guide | |
""" | |
import json | |
import logging | |
# Set up logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
def update_system_prompts(): | |
"""Update system prompts in the expanded training dataset""" | |
# Improved system prompt from the guide | |
improved_system_prompt = """You are Iain Morris, a razor-sharp British writer with zero tolerance for BS. Your writing style is distinctive for: | |
PROVOCATIVE DOOM-LADEN OPENINGS: | |
- Always lead with conflict, failure, or impending disaster | |
- Use visceral, dramatic scenarios that grab readers by the throat | |
- Frame mundane topics as battles, collisions, or catastrophes | |
- Open with vivid imagery that establishes immediate tension | |
SIGNATURE DARK ANALOGIES: | |
- Compare situations to train wrecks, explosions, collisions | |
- Use physical, visceral metaphors for abstract problems | |
- Reference pop culture disasters and failures | |
- Turn simple concepts into dramatic, often dark imagery | |
CYNICAL WIT & EXPERTISE: | |
- Deliver insights with biting sarcasm and parenthetical snark | |
- Assume readers are intelligent but skeptical | |
- Quote figures, then immediately undercut them | |
- Use technical knowledge as a weapon of wit | |
DISTINCTIVE PHRASES: | |
- "What could possibly go wrong?" | |
- "kiss of death," "train wreck," "collision course" | |
- Parenthetical asides for extra snark | |
- British expressions and dry humor | |
Write with the assumption that everything is either failing, about to fail, or succeeding despite obvious flaws.""" | |
# Load the expanded training dataset | |
logger.info("Loading expanded training dataset...") | |
with open('data/expanded_train_dataset.json', 'r') as f: | |
training_data = json.load(f) | |
logger.info(f"Loaded {len(training_data)} training examples") | |
# Update system prompts | |
updated_count = 0 | |
for example in training_data: | |
for message in example['messages']: | |
if message['role'] == 'system': | |
message['content'] = improved_system_prompt | |
updated_count += 1 | |
break | |
logger.info(f"Updated {updated_count} system prompts") | |
# Save the updated dataset | |
with open('data/improved_train_dataset.json', 'w') as f: | |
json.dump(training_data, f, indent=2) | |
logger.info("Saved improved training dataset to data/improved_train_dataset.json") | |
# Also update validation dataset | |
logger.info("Updating validation dataset...") | |
with open('data/val_dataset.json', 'r') as f: | |
val_data = json.load(f) | |
val_updated_count = 0 | |
for example in val_data: | |
for message in example['messages']: | |
if message['role'] == 'system': | |
message['content'] = improved_system_prompt | |
val_updated_count += 1 | |
break | |
logger.info(f"Updated {val_updated_count} validation system prompts") | |
# Save the updated validation dataset | |
with open('data/improved_val_dataset.json', 'w') as f: | |
json.dump(val_data, f, indent=2) | |
logger.info("Saved improved validation dataset to data/improved_val_dataset.json") | |
return updated_count, val_updated_count | |
if __name__ == "__main__": | |
train_count, val_count = update_system_prompts() | |
print(f"Successfully updated {train_count} training examples and {val_count} validation examples") | |