import pandas as pd import numpy as np from datetime import datetime import os def generate_enhanced_data_v3(num_samples=10000, output_path="enhanced_mantle_training.csv"): data = [] # Ensure balanced classes: approximately 33% Low, 33% Moderate, 33% High samples_per_class = num_samples // 3 for _ in range(samples_per_class): # Low Risk: temp <= 160°C, duration <= 45 min temp = np.random.randint(50, 161) duration = np.random.randint(5, 46) risk_level = "Low" risk_score = np.random.uniform(0, 40) alert = "Safe" timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") data.append([temp, duration, risk_level, risk_score, alert, timestamp]) for _ in range(samples_per_class): # Moderate Risk: 161°C <= temp <= 190°C, 46 min <= duration <= 90 min temp = np.random.randint(161, 191) duration = np.random.randint(46, 91) risk_level = "Moderate" risk_score = np.random.uniform(40, 70) alert = "Risk" timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") data.append([temp, duration, risk_level, risk_score, alert, timestamp]) for _ in range(num_samples - 2 * samples_per_class): # High Risk: temp > 190°C or duration > 90 min temp = np.random.randint(191, 201) duration = np.random.randint(91, 121) risk_level = "High" risk_score = np.random.uniform(70, 100) alert = "High Risk" timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") data.append([temp, duration, risk_level, risk_score, alert, timestamp]) # Shuffle the data np.random.shuffle(data) # Create DataFrame df = pd.DataFrame(data, columns=["temperature", "duration", "risk_level", "risk_score", "alert", "timestamp"]) # Save to file os.makedirs(os.path.dirname(output_path), exist_ok=True) df.to_csv(output_path, index=False) print(f"Data generation complete! Dataset saved as '{output_path}'.") return df if __name__ == "__main__": # Use relative path for Hugging Face output_path = os.path.join(os.path.dirname(__file__), "data", "enhanced_mantle_training.csv") generate_enhanced_data_v3(10000, output_path)