File size: 17,954 Bytes
6e8eb41
 
 
 
 
 
 
 
40f01ae
 
 
6e8eb41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40daefb
6e8eb41
 
 
 
 
 
 
40daefb
6e8eb41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40daefb
6e8eb41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40daefb
6e8eb41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40daefb
6e8eb41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import joblib
from sklearn.metrics import root_mean_squared_error

from scripts.make_dataset import create_features
from scripts.naive_approach import get_column_specs, prepare_data, zeroshot_eval, simple_diagonal_averaging
from scripts.ml_approach import format_dataset

CONTEXT_LENGTH = 52
PREDICTION_LENGTH = 6

# Custom theme settings
st.set_page_config(
    page_title="Glucose Level Prediction App",
    page_icon="📊",
    layout="wide"
)

# Apply custom styling with CSS
st.markdown("""
<style>
    /* Primary accent color */
    .stButton button, .stSelectbox, .stMultiselect, .stSlider, .stNumberInput {
        border-color: #58A618 !important;
    }
    .stProgress .st-bo {
        background-color: #58A618 !important;
    }
    .st-bq {
        color: #58A618 !important;
    }
    /* Header styling */
    h1, h2, h3 {
        color: #58A618 !important;
    }
    .stTabs [data-baseweb="tab-list"] button [data-testid="stMarkdownContainer"] p {
        color: #58A618 !important;
    }
    /* Success messages */
    .element-container .stAlert.st-ae.st-af {
        border-color: #58A618 !important;
        color: #58A618 !important;
    }
    /* Link color */
    a {
        color: #58A618 !important;
    }
    /* Button color */
    .stButton>button {
        background-color: #58A618 !important;
        color: white !important;
    }
    /* Make background white */
    .stApp {
        background-color: white !important;
    }
    /* Tab styling */
    .stTabs [data-baseweb="tab-list"] {
        gap: 10px;
    }
    .stTabs [data-baseweb="tab"] {
        background-color: #f0f0f0;
        border-radius: 4px 4px 0 0;
        padding: 10px 16px;
        border: 1px solid #ccc;
    }
    .stTabs [data-baseweb="tab"][aria-selected="true"] {
        background-color: white;
        border-bottom: 3px solid #58A618;
    }
</style>
""", unsafe_allow_html=True)

# Initialize session state variables if they don't exist
if 'combined_data' not in st.session_state:
    st.session_state.combined_data = None
if 'files_uploaded' not in st.session_state:
    st.session_state.files_uploaded = False
if 'data_processed' not in st.session_state:
    st.session_state.data_processed = False

# Title and description
st.title("Glucose Level Prediction App")
st.markdown("""
This app allows you to upload glucose measurements, food logs, and accelerometer data 
to analyze patterns and predict glucose levels.
""")

# Choose data source
st.subheader("Choose Data Source")
data_option = st.selectbox(
    "Select how you'd like to provide input data:",
    ("Upload files", "Sample A", "Sample B")
)

glucose_data = None
food_data = None
accel_data = None
combined_data = None
show_tabs = False

if data_option == "Upload files":
    st.subheader("Upload Your Data Files")

    glucose_file = st.file_uploader("Upload Glucose Levels CSV", type=["csv"], key="glucose")
    food_file = st.file_uploader("Upload Food Logs CSV", type=["csv"], key="food")
    accel_file = st.file_uploader("Upload Accelerometer Data CSV", type=["csv"], key="accel")
    
    st.subheader("Patient Demographics")

    # Gender selection
    gender = st.selectbox("Select Patient Gender", options=["Female", "Male", "Other"], index=0)

    # HbA1c input
    hba1c = st.number_input("Enter HbA1c (%)", min_value=3.0, max_value=15.0, step=0.1)

    all_files_uploaded = (glucose_file is not None) and (food_file is not None) and (accel_file is not None)

    # Attempt to load files if they exist
    if glucose_file is not None:
        try:
            glucose_data = pd.read_csv(glucose_file)
            st.success("Glucose data loaded successfully!")
        except Exception as e:
            st.error(f"Error loading glucose data: {e}")
            glucose_data = None

    if food_file is not None:
        try:
            food_data = pd.read_csv(food_file)
            st.success("Food logs loaded successfully!")
        except Exception as e:
            st.error(f"Error loading food logs: {e}")
            food_data = None

    if accel_file is not None:
        try:
            accel_data = pd.read_csv(accel_file)
            st.success("Accelerometer data loaded successfully!")
        except Exception as e:
            st.error(f"Error loading accelerometer data: {e}")
            accel_data = None
    
    # Update the upload status in session state
    st.session_state.files_uploaded = all_files_uploaded
    
    # Show message if not all files are uploaded
    if not all_files_uploaded:
        st.warning("Please upload all three data files to enable data processing.")
    
    col1, col2, col3 = st.columns([1,1,1])

    with col2:
        # Add a button to process the data - disabled until all files are uploaded
        if st.button('Process Data', key='process_data_button', disabled=not all_files_uploaded):
            if all_files_uploaded:
                try:
                    # Call create_features with appropriate parameters
                    combined_data = create_features(
                        bg_df=glucose_data,
                        food_df=food_data,
                        acc_df=accel_data,
                        gender=gender,
                        hba1c=hba1c,
                        add_patient_id=True
                    )
                    st.session_state.combined_data = combined_data
                    st.session_state.data_processed = True
                    st.success("Data processed successfully!")
                    show_tabs = True
                except Exception as e:
                    st.error(f"Error processing data: {e}")
                    st.session_state.data_processed = False
                    show_tabs = False

    st.subheader("Expected File Formats:")
        
    col1, col2, col3 = st.columns(3)
    
    with col1:
        st.markdown("""
        **Glucose Levels CSV:**
        - Timestamp column
        - Glucose measurement values
        """)
    
    with col2:
        st.markdown("""
        **Food Logs CSV:**
        - Timestamp column
        - Carbohydrates
        - Sugar
        - Calories
        """)
    
    with col3:
        st.markdown("""
        **Accelerometer Data CSV:**
        - Timestamp column
        - Activity measurements
        """)
    
    # Check if data was previously processed
    if st.session_state.data_processed and st.session_state.combined_data is not None:
        combined_data = st.session_state.combined_data
        show_tabs = True
        
elif data_option == "Sample A":
    combined_data_path = 'data/processed/samples/sample_A.csv'
    combined_data = pd.read_csv(combined_data_path)
    st.session_state.combined_data = combined_data
    st.session_state.data_processed = True
    st.success("Sample A loaded successfully!")
    show_tabs = True

elif data_option == "Sample B":
    combined_data_path = 'data/processed/samples/sample_B.csv'
    combined_data = pd.read_csv(combined_data_path)
    st.session_state.combined_data = combined_data
    st.session_state.data_processed = True
    st.success("Sample B loaded successfully!")
    show_tabs = True

# Add some spacing
st.write("")
st.write("")

# Only show tabs if sample data is loaded or user data has been successfully processed
if show_tabs:
    # Create tabs for data exploration
    tab1, tab2, tab3 = st.tabs(["Naive Model", "Machine Learning Model", "Deep Learning Model"])

    with tab1:
        st.subheader("Naive Model")
        
        if st.button('Make prediction', key='naive_button'):
            if combined_data is not None:
                
                # Add your naive model prediction code here
                try:
                    # Call naive model prediction functions
                    column_specs = get_column_specs()
                    prepared_data = prepare_data(combined_data, column_specs["timestamp_column"])
                    train_file = 'data/processed/train_dataset.csv'
                    train_data = pd.read_csv(train_file)
                    train_data = prepare_data(train_data, column_specs["timestamp_column"])
                    predictions = zeroshot_eval(
                        train_df=train_data,
                        test_df=prepared_data,
                        batch_size=8
                    )
                    
                    # Get all step columns
                    step_columns = [col for col in predictions["predictions_df"].columns if col.startswith("Glucose_step_")]
                    
                    # Apply simple diagonal averaging by patient
                    final_results = simple_diagonal_averaging(
                        predictions["predictions_df"], 
                        prepared_data, 
                        CONTEXT_LENGTH,
                        step_columns
                    )    
                    
                    # Visualize predictions vs actual values
                    fig, ax = plt.subplots(figsize=(10, 6))

                    # Filter out zero predictions
                    non_zero_mask = final_results['averaged_prediction'] != 0
                    filtered_results = final_results[non_zero_mask]

                    # Plot predictions (only non-zero values) in green
                    ax.plot(filtered_results['Timestamp'], filtered_results['averaged_prediction'], 
                            label='Predicted', alpha=0.7, color='#58A618')

                    # Plot actual values (all data) in blue
                    ax.plot(final_results['Timestamp'], final_results['Glucose'], 
                            label='Ground truth', alpha=0.7, color='#1f77b4')

                    ax.set_title('Glucose Predictions vs Actual Values')
                    ax.set_xlabel('Time')
                    ax.set_ylabel('Glucose Level')
                    ax.legend()

                    st.pyplot(fig)

                    y_true = final_results['Glucose'][CONTEXT_LENGTH:].reset_index(drop=True)
                    y_pred = final_results['averaged_prediction'][CONTEXT_LENGTH:].reset_index(drop=True)
                    
                    # Filter out zero predictions
                    non_zero_mask = y_pred != 0
                    y_true_filtered = y_true[non_zero_mask]
                    y_pred_filtered = y_pred[non_zero_mask]
                    
                    if len(y_pred_filtered) > 0:
                        rmse = np.sqrt(root_mean_squared_error(y_true_filtered, y_pred_filtered))
                        st.subheader("Performance Metrics")
                        st.metric("AVERAGE RMSE", f"{rmse:.4f}")
                    else:
                        st.subheader("Performance Metrics")
                        st.metric("AVERAGE RMSE", "N/A")

                except Exception as e:
                    st.error(f"Error in naive model prediction: {e}")
            else:
                st.error("Data not available. Please try again.")

    with tab2:
        st.subheader("Machine Learning Model")
        
        if st.button('Make prediction', key='ml_button'):
            if combined_data is not None:
                X_test, y_test = format_dataset(combined_data, CONTEXT_LENGTH, PREDICTION_LENGTH)

                model_output_path = "models/xgb_model.pkl"
                xgb_model = joblib.load(model_output_path)

                y_test_pred = xgb_model.predict(X_test)

                final_results = simple_diagonal_averaging(
                    pd.DataFrame(y_test_pred), 
                    combined_data, 
                    CONTEXT_LENGTH,
                    pd.DataFrame(y_test_pred).columns
                )

                # Visualize predictions vs actual values
                fig, ax = plt.subplots(figsize=(10, 6))

                # Plot all actual values in blue
                ax.plot(final_results['Timestamp'], final_results['Glucose'], 
                        label='Ground truth', alpha=0.7, color='#1f77b4')
                
                # Replace zeros with NaN (which matplotlib will skip when plotting)
                plot_predictions = final_results['averaged_prediction'].copy()
                plot_predictions = plot_predictions.replace(0, float('nan'))
                
                # Plot predictions with NaN instead of zeros in green
                ax.plot(final_results['Timestamp'], plot_predictions, 
                        label='Predicted', alpha=0.7, color='#58A618')

                ax.set_title('Glucose Predictions vs Actual Values')
                ax.set_xlabel('Time')
                ax.set_ylabel('Glucose Level')
                ax.legend()

                st.pyplot(fig)

                # Calculate and display metrics for single patient
                y_true = final_results['Glucose'][CONTEXT_LENGTH:].reset_index(drop=True)
                y_pred = final_results['averaged_prediction'][CONTEXT_LENGTH:].reset_index(drop=True)
                
                # Filter out zero predictions
                non_zero_mask = y_pred != 0
                y_true_filtered = y_true[non_zero_mask]
                y_pred_filtered = y_pred[non_zero_mask]
                
                if len(y_pred_filtered) > 0:
                    rmse = np.sqrt(root_mean_squared_error(y_true_filtered, y_pred_filtered))
                    st.subheader("Performance Metrics")
                    st.metric("AVERAGE RMSE", f"{rmse:.4f}")
                else:
                    st.subheader("Performance Metrics")
                    st.metric("AVERAGE RMSE", "N/A")
            
            else:
                st.error("Data not available. Please try again.")

    with tab3:
        st.subheader("Deep Learning Model")
        
        if st.button('Make prediction', key='dl_button'):
            if combined_data is not None:
                column_specs = get_column_specs()
                prepared_data = prepare_data(combined_data, column_specs["timestamp_column"])
                
                train_file = 'data/processed/train_dataset.csv'
                train_data = pd.read_csv(train_file)
                train_data = prepare_data(train_data, column_specs["timestamp_column"])
                predictions = zeroshot_eval(
                    train_df=train_data,
                    test_df=prepared_data,
                    batch_size=8,
                    model_path="iaravagni/ttm-finetuned-model"
                )
                
                # Get all step columns
                step_columns = [col for col in predictions["predictions_df"].columns if col.startswith("Glucose_step_")]
                
                # Apply simple diagonal averaging by patient
                final_results = simple_diagonal_averaging(
                    predictions["predictions_df"], 
                    prepared_data, 
                    CONTEXT_LENGTH,
                    step_columns
                )    
                
                # Visualize predictions vs actual values
                fig, ax = plt.subplots(figsize=(10, 6))

                # Filter out zero predictions
                non_zero_mask = final_results['averaged_prediction'] != 0
                filtered_results = final_results[non_zero_mask]

                # Plot predictions (only non-zero values) in green
                ax.plot(filtered_results['Timestamp'], filtered_results['averaged_prediction'], 
                        label='Predicted', alpha=0.7, color='#58A618')

                # Plot actual values (all data) in blue
                ax.plot(final_results['Timestamp'], final_results['Glucose'], 
                        label='Ground truth', alpha=0.7, color='#1f77b4')

                ax.set_title('Glucose Predictions vs Actual Values')
                ax.set_xlabel('Time')
                ax.set_ylabel('Glucose Level')
                ax.legend()

                st.pyplot(fig)

                # Calculate and display metrics for single patient
                y_true = final_results['Glucose'][CONTEXT_LENGTH:].reset_index(drop=True)
                y_pred = final_results['averaged_prediction'][CONTEXT_LENGTH:].reset_index(drop=True)
                
                # Filter out zero predictions
                non_zero_mask = y_pred != 0
                y_true_filtered = y_true[non_zero_mask]
                y_pred_filtered = y_pred[non_zero_mask]
                
                if len(y_pred_filtered) > 0:
                    rmse = np.sqrt(root_mean_squared_error(y_true_filtered, y_pred_filtered))
                    st.subheader("Performance Metrics")
                    st.metric("AVERAGE RMSE", f"{rmse:.4f}")
                else:
                    st.subheader("Performance Metrics")
                    st.metric("AVERAGE RMSE", "N/A")
            else:
                st.error("Data not available. Please try again.")
else:
    st.info("Upload and process data or select a sample dataset to view prediction models.")

# Add some spacing
st.write("")
st.write("")

# App information and disclaimer
st.markdown("""
---
### About this App

This application is designed to help analyze and predict glucose levels based on glucose measurements,
food logs, and physical activity data. The app merges these datasets based on timestamps to identify
patterns and make predictions.

Please note that this is a demonstration tool and should not be used for medical decisions without
consultation with healthcare professionals.

""")

# Add a footer with the custom color
st.markdown("""
<style>
.footer {
    position: fixed;
    left: 0;
    bottom: 0;
    width: 100%;
    background-color: white;
    color: #58A618;
    text-align: center;
    padding: 10px;
    border-top: 2px solid #58A618;
}
</style>
<div class="footer">
    <p>Glucose Prediction Application © 2025</p>
</div>
""", unsafe_allow_html=True)