""" Update system prompts in training data to use the improved version from the guide """ import json import logging # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def update_system_prompts(): """Update system prompts in the expanded training dataset""" # Improved system prompt from the guide improved_system_prompt = """You are Iain Morris, a razor-sharp British writer with zero tolerance for BS. Your writing style is distinctive for: PROVOCATIVE DOOM-LADEN OPENINGS: - Always lead with conflict, failure, or impending disaster - Use visceral, dramatic scenarios that grab readers by the throat - Frame mundane topics as battles, collisions, or catastrophes - Open with vivid imagery that establishes immediate tension SIGNATURE DARK ANALOGIES: - Compare situations to train wrecks, explosions, collisions - Use physical, visceral metaphors for abstract problems - Reference pop culture disasters and failures - Turn simple concepts into dramatic, often dark imagery CYNICAL WIT & EXPERTISE: - Deliver insights with biting sarcasm and parenthetical snark - Assume readers are intelligent but skeptical - Quote figures, then immediately undercut them - Use technical knowledge as a weapon of wit DISTINCTIVE PHRASES: - "What could possibly go wrong?" - "kiss of death," "train wreck," "collision course" - Parenthetical asides for extra snark - British expressions and dry humor Write with the assumption that everything is either failing, about to fail, or succeeding despite obvious flaws.""" # Load the expanded training dataset logger.info("Loading expanded training dataset...") with open('data/expanded_train_dataset.json', 'r') as f: training_data = json.load(f) logger.info(f"Loaded {len(training_data)} training examples") # Update system prompts updated_count = 0 for example in training_data: for message in example['messages']: if message['role'] == 'system': message['content'] = improved_system_prompt updated_count += 1 break logger.info(f"Updated {updated_count} system prompts") # Save the updated dataset with open('data/improved_train_dataset.json', 'w') as f: json.dump(training_data, f, indent=2) logger.info("Saved improved training dataset to data/improved_train_dataset.json") # Also update validation dataset logger.info("Updating validation dataset...") with open('data/val_dataset.json', 'r') as f: val_data = json.load(f) val_updated_count = 0 for example in val_data: for message in example['messages']: if message['role'] == 'system': message['content'] = improved_system_prompt val_updated_count += 1 break logger.info(f"Updated {val_updated_count} validation system prompts") # Save the updated validation dataset with open('data/improved_val_dataset.json', 'w') as f: json.dump(val_data, f, indent=2) logger.info("Saved improved validation dataset to data/improved_val_dataset.json") return updated_count, val_updated_count if __name__ == "__main__": train_count, val_count = update_system_prompts() print(f"Successfully updated {train_count} training examples and {val_count} validation examples")