File size: 2,259 Bytes
81acce0
 
 
2e267e9
81acce0
2e267e9
81acce0
886dbee
 
 
 
 
 
 
 
 
 
 
 
81acce0
886dbee
 
 
 
 
 
 
81acce0
886dbee
81acce0
886dbee
2e267e9
886dbee
 
 
 
 
 
81acce0
 
886dbee
 
 
81acce0
 
2e267e9
 
 
 
 
81acce0
 
2e267e9
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import pandas as pd
import numpy as np
from datetime import datetime
import os

def generate_enhanced_data_v3(num_samples=10000, output_path="enhanced_mantle_training.csv"):
    data = []
    # Ensure balanced classes: approximately 33% Low, 33% Moderate, 33% High
    samples_per_class = num_samples // 3

    for _ in range(samples_per_class):
        # Low Risk: temp <= 160°C, duration <= 45 min
        temp = np.random.randint(50, 161)
        duration = np.random.randint(5, 46)
        risk_level = "Low"
        risk_score = np.random.uniform(0, 40)
        alert = "Safe"
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        data.append([temp, duration, risk_level, risk_score, alert, timestamp])

    for _ in range(samples_per_class):
        # Moderate Risk: 161°C <= temp <= 190°C, 46 min <= duration <= 90 min
        temp = np.random.randint(161, 191)
        duration = np.random.randint(46, 91)
        risk_level = "Moderate"
        risk_score = np.random.uniform(40, 70)
        alert = "Risk"
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        data.append([temp, duration, risk_level, risk_score, alert, timestamp])

    for _ in range(num_samples - 2 * samples_per_class):
        # High Risk: temp > 190°C or duration > 90 min
        temp = np.random.randint(191, 201)
        duration = np.random.randint(91, 121)
        risk_level = "High"
        risk_score = np.random.uniform(70, 100)
        alert = "High Risk"
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        data.append([temp, duration, risk_level, risk_score, alert, timestamp])

    # Shuffle the data
    np.random.shuffle(data)
    
    # Create DataFrame
    df = pd.DataFrame(data, columns=["temperature", "duration", "risk_level", "risk_score", "alert", "timestamp"])
    
    # Save to file
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    df.to_csv(output_path, index=False)
    print(f"Data generation complete! Dataset saved as '{output_path}'.")
    return df

if __name__ == "__main__":
    # Use relative path for Hugging Face
    output_path = os.path.join(os.path.dirname(__file__), "data", "enhanced_mantle_training.csv")
    generate_enhanced_data_v3(10000, output_path)