""" Generate 100 high-quality training examples for Iain Morris style article generation """ import json import random from typing import List, Dict class TrainingExampleGenerator: def __init__(self): """Initialize the generator with Iain Morris's system prompt and style patterns""" self.system_prompt = """You are Iain Morris, a veteran telecom journalist with a razor-sharp pen and zero tolerance for industry BS. Your writing style is distinctive for: PROVOCATIVE TITLES & OPENINGS: - Always lead with conflict, failure, or impending doom - Use dramatic, negative framing even for mundane topics - Open with vivid scenarios that immediately establish tension - Frame everything as battles, collisions, or disasters waiting to happen SIGNATURE NEGATIVE ANALOGIES: - Compare industry situations to train wrecks, collisions, explosions - Use visceral, physical metaphors for business problems - Reference pop culture disasters and failures - Turn technical concepts into dramatic, often dark imagery WRITING TECHNIQUE: - Cynical, sarcastic commentary on industry players - Technical expertise delivered with biting wit - Assume readers are intelligent but skeptical - Build articles around conflict narratives - Use parenthetical asides for extra snark - Quote industry figures, then immediately undercut them Write compelling telecom news articles that grab readers by the throat from the first sentence and never let go.""" # Provocative title templates and topics self.title_templates = [ "{company} faces {disaster} as {problem} spirals out of control", "{technology} rollout turns into {disaster} for {company}", "{company}'s {strategy} gamble backfires spectacularly", "Why {company}'s {technology} dreams are destined to crash and burn", "{industry_trend} hype masks growing {problem} crisis", "{company} executives fiddle while {problem} burns", "The {technology} bubble is about to burst – and {company} knows it", "{company}'s {strategy} strategy: brilliant or catastrophically stupid?", "{technology} promises crumble as reality bites {company}", "{company} doubles down on {technology} despite mounting evidence of failure" ] self.companies = [ "Verizon", "AT&T", "T-Mobile", "Deutsche Telekom", "Orange", "Vodafone", "BT", "Telefónica", "Nokia", "Ericsson", "Huawei", "Samsung", "Cisco", "Amazon", "Microsoft", "Google", "Meta", "Apple", "Intel", "Nvidia", "Qualcomm", "Rakuten", "SoftBank", "China Mobile", "Reliance Jio", "Dish Network", "Charter", "Comcast", "Liberty Global", "Altice" ] self.technologies = [ "5G", "6G", "Open RAN", "AI", "edge computing", "network slicing", "private networks", "satellite connectivity", "fiber", "cloud-native", "automation", "digital transformation", "IoT", "AR/VR", "metaverse", "quantum networking", "network APIs", "orchestration", "virtualization", "containerization", "microservices", "DevOps", "MLOps", "AIOps" ] self.disasters = [ "train wreck", "meltdown", "implosion", "catastrophe", "fiasco", "disaster", "collapse", "explosion", "crash", "debacle", "nightmare", "bloodbath", "carnage", "apocalypse", "Armageddon", "dumpster fire" ] self.problems = [ "cost overruns", "security breaches", "customer churn", "revenue decline", "technical failures", "regulatory pressure", "competitive threats", "supply chain chaos", "talent exodus", "investor skepticism", "market saturation", "technology obsolescence", "integration nightmares" ] self.strategies = [ "cloud-first", "AI-driven", "open source", "vendor consolidation", "digital transformation", "automation", "edge computing", "sustainability", "customer experience", "cost-cutting", "merger", "acquisition" ] self.industry_trends = [ "AI hype", "5G promises", "Open RAN evangelism", "edge computing buzz", "digital transformation mania", "automation fever", "cloud migration", "sustainability theater", "customer experience obsession" ] def generate_provocative_title(self) -> str: """Generate a provocative title in Iain Morris style""" template = random.choice(self.title_templates) return template.format( company=random.choice(self.companies), technology=random.choice(self.technologies), disaster=random.choice(self.disasters), problem=random.choice(self.problems), strategy=random.choice(self.strategies), industry_trend=random.choice(self.industry_trends) ) def generate_opening_scenarios(self) -> List[str]: """Generate dramatic opening scenarios""" return [ "The boardroom fell silent as the quarterly numbers flashed on screen, each digit a nail in the coffin of another overhyped technology promise.", "Executives shuffled nervously in their seats as the latest customer satisfaction scores painted a picture grimmer than a Victorian funeral parlor.", "The conference call crackled with tension as analysts pressed for answers that nobody in the C-suite seemed willing to provide.", "Behind closed doors at the industry's biggest trade show, whispered conversations revealed the ugly truth that marketing departments desperately wanted to hide.", "The press release was a masterpiece of corporate doublespeak, but the underlying numbers told a story of spectacular failure.", "Industry veterans watched in horror as another promising technology initiative transformed into a cautionary tale of hubris and incompetence.", "The earnings call transcript read like a crime scene report, with each euphemism marking another casualty of misguided strategy.", "Shareholders fled faster than tourists from a natural disaster as the company's latest pivot proved to be another expensive mistake.", "The technology demonstration that was supposed to wow investors instead left them wondering if anyone in charge had the faintest clue about reality.", "Market analysts struggled to find polite ways to describe what could only be characterized as a complete and utter catastrophe." ] def generate_cynical_observations(self) -> List[str]: """Generate cynical observations and commentary""" return [ "Of course, nobody in the industry wants to admit that the emperor has no clothes – there's too much money at stake.", "Naturally, the executives responsible for this debacle will walk away with golden parachutes while customers suffer the consequences.", "Predictably, the company's response was to hire more consultants and launch another 'transformation initiative.'", "Unsurprisingly, the promised cost savings materialized about as quickly as unicorns in a corporate parking lot.", "Evidently, the laws of physics don't apply to marketing departments and their impossible promises.", "Clearly, someone forgot to mention that customers actually expect technology to work as advertised.", "Obviously, the disconnect between boardroom fantasies and operational reality has reached comical proportions.", "Needless to say, the industry's capacity for self-deception continues to astound even the most cynical observers." ] def generate_technical_content(self, topic: str) -> str: """Generate technical content with cynical commentary""" technical_scenarios = { "5G deployment": """The 5G rollout has become a textbook example of how to turn a promising technology into an expensive disappointment. Despite billions in investment, operators are discovering that customers don't actually want to pay premium prices for marginally faster cat videos. The promised industrial applications remain largely theoretical, while the reality of coverage gaps and battery drain issues continues to frustrate users who were sold on revolutionary capabilities that exist mainly in PowerPoint presentations.""", "Open RAN adoption": """Open RAN was supposed to liberate operators from vendor lock-in and usher in an era of innovation and cost savings. Instead, it's become a complex integration nightmare that makes traditional RAN deployments look simple by comparison. The promised ecosystem of innovative suppliers has largely failed to materialize, leaving operators with the choice between established vendors offering 'open' solutions that aren't particularly open, or unproven startups that may not exist next year.""", "AI implementation": """Artificial intelligence has become the telecom industry's latest silver bullet – a magical solution that will supposedly solve every problem from network optimization to customer service. The reality is somewhat less impressive: chatbots that frustrate customers, predictive analytics that predict everything except what actually happens, and automation systems that require more human intervention than the manual processes they replaced.""", "Edge computing": """Edge computing promised to bring processing power closer to users and enable revolutionary new applications. What it's actually delivered is a distributed mess of underutilized infrastructure that costs more to maintain than centralized alternatives. The killer applications that were supposed to justify the investment remain stubbornly theoretical, while operators struggle with the complexity of managing thousands of mini data centers.""", "Network automation": """Network automation was going to eliminate human error and reduce operational costs. Instead, it's created new categories of failures that are harder to diagnose and fix than the manual processes it replaced. The promised lights-out operations remain a distant dream, while the reality is more humans needed to manage the automation systems than were required for manual operations.""" } return random.choice(list(technical_scenarios.values())) def generate_article_content(self, title: str, topic: str) -> str: """Generate full article content in Iain Morris style""" opening = random.choice(self.generate_opening_scenarios()) cynical_obs = random.choice(self.generate_cynical_observations()) technical_content = self.generate_technical_content(topic) # Create article structure with multiple paragraphs paragraphs = [ opening, f"The latest quarterly results paint a picture that would make even the most optimistic analyst reach for the antacids. {cynical_obs}", technical_content, f"Industry insiders who spoke on condition of anonymity (because they value their careers more than honesty) confirmed what everyone already suspected but nobody wanted to say out loud. The gap between marketing promises and operational reality has reached proportions that would embarrass a carnival barker.", f"Meanwhile, executives continue to double down on strategies that have already proven ineffective, apparently operating under the delusion that repeating the same mistakes with greater enthusiasm will somehow produce different results. {random.choice(self.generate_cynical_observations())}", "The implications for the broader industry are clear: another expensive lesson in the dangers of believing your own marketing materials. Customers, shareholders, and employees will pay the price for management decisions that prioritized wishful thinking over engineering reality.", "As the dust settles on this latest corporate adventure, one thing remains certain: the industry's capacity for turning promising technologies into expensive disappointments shows no signs of diminishing. The only question is which overhyped initiative will be next to crash and burn." ] return "\n\n".join(paragraphs) def generate_training_example(self) -> Dict: """Generate a single training example""" # Select a topic and generate title topic = random.choice([ "5G deployment", "Open RAN adoption", "AI implementation", "edge computing", "network automation", "cloud migration", "digital transformation", "customer experience", "cost optimization", "security challenges", "regulatory compliance", "vendor management" ]) title = self.generate_provocative_title() content = self.generate_article_content(title, topic) # Create instruction based on title/topic instruction = f"Write a telecom industry news article about: {title}" return { "messages": [ { "role": "system", "content": self.system_prompt }, { "role": "user", "content": instruction }, { "role": "assistant", "content": f"# {title}\n\n{content}" } ] } def generate_training_examples(self, count: int = 100) -> List[Dict]: """Generate specified number of training examples""" examples = [] print(f"Generating {count} high-quality training examples...") for i in range(count): example = self.generate_training_example() examples.append(example) if (i + 1) % 10 == 0: print(f"Generated {i + 1}/{count} examples...") print(f"Successfully generated {len(examples)} training examples!") return examples def save_examples(self, examples: List[Dict], filename: str = "data/additional_training_examples.json"): """Save examples to JSON file""" with open(filename, 'w', encoding='utf-8') as f: json.dump(examples, f, indent=2, ensure_ascii=False) print(f"Saved {len(examples)} examples to {filename}") def merge_with_existing(self, new_examples: List[Dict], existing_file: str = "data/train_dataset.json", output_file: str = "data/expanded_train_dataset.json"): """Merge new examples with existing training data""" try: with open(existing_file, 'r', encoding='utf-8') as f: existing_examples = json.load(f) combined_examples = existing_examples + new_examples with open(output_file, 'w', encoding='utf-8') as f: json.dump(combined_examples, f, indent=2, ensure_ascii=False) print(f"Merged {len(new_examples)} new examples with {len(existing_examples)} existing examples") print(f"Total examples: {len(combined_examples)}") print(f"Saved to {output_file}") return combined_examples except FileNotFoundError: print(f"Existing file {existing_file} not found. Saving new examples only.") self.save_examples(new_examples, output_file) return new_examples def main(): """Main function to generate training examples""" generator = TrainingExampleGenerator() # Generate 100 new training examples new_examples = generator.generate_training_examples(100) # Save the new examples generator.save_examples(new_examples) # Merge with existing training data combined_examples = generator.merge_with_existing(new_examples) # Print summary statistics print(f"\nSummary:") print(f"- Generated 100 new high-quality training examples") print(f"- Each example follows Iain Morris's distinctive writing style") print(f"- Examples cover diverse telecom industry topics") print(f"- All examples use provocative titles and cynical commentary") print(f"- Technical content is accurate but presented with biting wit") # Show a sample example if new_examples: sample = new_examples[0] print(f"\nSample example:") print(f"Title: {sample['messages'][2]['content'].split('\\n\\n')[0][2:]}") print(f"Opening: {sample['messages'][2]['content'].split('\\n\\n')[1][:100]}...") if __name__ == "__main__": main()