import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import os import joblib from sklearn.metrics import root_mean_squared_error from scripts.make_dataset import create_features from scripts.naive_approach import get_column_specs, prepare_data, zeroshot_eval, simple_diagonal_averaging from scripts.ml_approach import format_dataset CONTEXT_LENGTH = 52 PREDICTION_LENGTH = 6 # Custom theme settings st.set_page_config( page_title="Glucose Level Prediction App", page_icon="📊", layout="wide" ) # Apply custom styling with CSS st.markdown(""" """, unsafe_allow_html=True) # Initialize session state variables if they don't exist if 'combined_data' not in st.session_state: st.session_state.combined_data = None if 'files_uploaded' not in st.session_state: st.session_state.files_uploaded = False if 'data_processed' not in st.session_state: st.session_state.data_processed = False # Title and description st.title("Glucose Level Prediction App") st.markdown(""" This app allows you to upload glucose measurements, food logs, and accelerometer data to analyze patterns and predict glucose levels. """) # Choose data source st.subheader("Choose Data Source") data_option = st.selectbox( "Select how you'd like to provide input data:", ("Upload files", "Sample A", "Sample B") ) glucose_data = None food_data = None accel_data = None combined_data = None show_tabs = False if data_option == "Upload files": st.subheader("Upload Your Data Files") glucose_file = st.file_uploader("Upload Glucose Levels CSV", type=["csv"], key="glucose") food_file = st.file_uploader("Upload Food Logs CSV", type=["csv"], key="food") accel_file = st.file_uploader("Upload Accelerometer Data CSV", type=["csv"], key="accel") st.subheader("Patient Demographics") # Gender selection gender = st.selectbox("Select Patient Gender", options=["Female", "Male", "Other"], index=0) # HbA1c input hba1c = st.number_input("Enter HbA1c (%)", min_value=3.0, max_value=15.0, step=0.1) all_files_uploaded = (glucose_file is not None) and (food_file is not None) and (accel_file is not None) # Attempt to load files if they exist if glucose_file is not None: try: glucose_data = pd.read_csv(glucose_file) st.success("Glucose data loaded successfully!") except Exception as e: st.error(f"Error loading glucose data: {e}") glucose_data = None if food_file is not None: try: food_data = pd.read_csv(food_file) st.success("Food logs loaded successfully!") except Exception as e: st.error(f"Error loading food logs: {e}") food_data = None if accel_file is not None: try: accel_data = pd.read_csv(accel_file) st.success("Accelerometer data loaded successfully!") except Exception as e: st.error(f"Error loading accelerometer data: {e}") accel_data = None # Update the upload status in session state st.session_state.files_uploaded = all_files_uploaded # Show message if not all files are uploaded if not all_files_uploaded: st.warning("Please upload all three data files to enable data processing.") col1, col2, col3 = st.columns([1,1,1]) with col2: # Add a button to process the data - disabled until all files are uploaded if st.button('Process Data', key='process_data_button', disabled=not all_files_uploaded): if all_files_uploaded: try: # Call create_features with appropriate parameters combined_data = create_features( bg_df=glucose_data, food_df=food_data, acc_df=accel_data, gender=gender, hba1c=hba1c, add_patient_id=True ) st.session_state.combined_data = combined_data st.session_state.data_processed = True st.success("Data processed successfully!") show_tabs = True except Exception as e: st.error(f"Error processing data: {e}") st.session_state.data_processed = False show_tabs = False st.subheader("Expected File Formats:") col1, col2, col3 = st.columns(3) with col1: st.markdown(""" **Glucose Levels CSV:** - Timestamp column - Glucose measurement values """) with col2: st.markdown(""" **Food Logs CSV:** - Timestamp column - Carbohydrates - Sugar - Calories """) with col3: st.markdown(""" **Accelerometer Data CSV:** - Timestamp column - Activity measurements """) # Check if data was previously processed if st.session_state.data_processed and st.session_state.combined_data is not None: combined_data = st.session_state.combined_data show_tabs = True elif data_option == "Sample A": combined_data_path = 'data/processed/samples/sample_A.csv' combined_data = pd.read_csv(combined_data_path) st.session_state.combined_data = combined_data st.session_state.data_processed = True st.success("Sample A loaded successfully!") show_tabs = True elif data_option == "Sample B": combined_data_path = 'data/processed/samples/sample_B.csv' combined_data = pd.read_csv(combined_data_path) st.session_state.combined_data = combined_data st.session_state.data_processed = True st.success("Sample B loaded successfully!") show_tabs = True # Add some spacing st.write("") st.write("") # Only show tabs if sample data is loaded or user data has been successfully processed if show_tabs: # Create tabs for data exploration tab1, tab2, tab3 = st.tabs(["Naive Model", "Machine Learning Model", "Deep Learning Model"]) with tab1: st.subheader("Naive Model") if st.button('Make prediction', key='naive_button'): if combined_data is not None: # Add your naive model prediction code here try: # Call naive model prediction functions column_specs = get_column_specs() prepared_data = prepare_data(combined_data, column_specs["timestamp_column"]) train_file = 'data/processed/train_dataset.csv' train_data = pd.read_csv(train_file) train_data = prepare_data(train_data, column_specs["timestamp_column"]) predictions = zeroshot_eval( train_df=train_data, test_df=prepared_data, batch_size=8 ) # Get all step columns step_columns = [col for col in predictions["predictions_df"].columns if col.startswith("Glucose_step_")] # Apply simple diagonal averaging by patient final_results = simple_diagonal_averaging( predictions["predictions_df"], prepared_data, CONTEXT_LENGTH, step_columns ) # Visualize predictions vs actual values fig, ax = plt.subplots(figsize=(10, 6)) # Filter out zero predictions non_zero_mask = final_results['averaged_prediction'] != 0 filtered_results = final_results[non_zero_mask] # Plot predictions (only non-zero values) in green ax.plot(filtered_results['Timestamp'], filtered_results['averaged_prediction'], label='Predicted', alpha=0.7, color='#58A618') # Plot actual values (all data) in blue ax.plot(final_results['Timestamp'], final_results['Glucose'], label='Ground truth', alpha=0.7, color='#1f77b4') ax.set_title('Glucose Predictions vs Actual Values') ax.set_xlabel('Time') ax.set_ylabel('Glucose Level') ax.legend() st.pyplot(fig) y_true = final_results['Glucose'][CONTEXT_LENGTH:].reset_index(drop=True) y_pred = final_results['averaged_prediction'][CONTEXT_LENGTH:].reset_index(drop=True) # Filter out zero predictions non_zero_mask = y_pred != 0 y_true_filtered = y_true[non_zero_mask] y_pred_filtered = y_pred[non_zero_mask] if len(y_pred_filtered) > 0: rmse = np.sqrt(root_mean_squared_error(y_true_filtered, y_pred_filtered)) st.subheader("Performance Metrics") st.metric("AVERAGE RMSE", f"{rmse:.4f}") else: st.subheader("Performance Metrics") st.metric("AVERAGE RMSE", "N/A") except Exception as e: st.error(f"Error in naive model prediction: {e}") else: st.error("Data not available. Please try again.") with tab2: st.subheader("Machine Learning Model") if st.button('Make prediction', key='ml_button'): if combined_data is not None: X_test, y_test = format_dataset(combined_data, CONTEXT_LENGTH, PREDICTION_LENGTH) model_output_path = "models/xgb_model.pkl" xgb_model = joblib.load(model_output_path) y_test_pred = xgb_model.predict(X_test) final_results = simple_diagonal_averaging( pd.DataFrame(y_test_pred), combined_data, CONTEXT_LENGTH, pd.DataFrame(y_test_pred).columns ) # Visualize predictions vs actual values fig, ax = plt.subplots(figsize=(10, 6)) # Plot all actual values in blue ax.plot(final_results['Timestamp'], final_results['Glucose'], label='Ground truth', alpha=0.7, color='#1f77b4') # Replace zeros with NaN (which matplotlib will skip when plotting) plot_predictions = final_results['averaged_prediction'].copy() plot_predictions = plot_predictions.replace(0, float('nan')) # Plot predictions with NaN instead of zeros in green ax.plot(final_results['Timestamp'], plot_predictions, label='Predicted', alpha=0.7, color='#58A618') ax.set_title('Glucose Predictions vs Actual Values') ax.set_xlabel('Time') ax.set_ylabel('Glucose Level') ax.legend() st.pyplot(fig) # Calculate and display metrics for single patient y_true = final_results['Glucose'][CONTEXT_LENGTH:].reset_index(drop=True) y_pred = final_results['averaged_prediction'][CONTEXT_LENGTH:].reset_index(drop=True) # Filter out zero predictions non_zero_mask = y_pred != 0 y_true_filtered = y_true[non_zero_mask] y_pred_filtered = y_pred[non_zero_mask] if len(y_pred_filtered) > 0: rmse = np.sqrt(root_mean_squared_error(y_true_filtered, y_pred_filtered)) st.subheader("Performance Metrics") st.metric("AVERAGE RMSE", f"{rmse:.4f}") else: st.subheader("Performance Metrics") st.metric("AVERAGE RMSE", "N/A") else: st.error("Data not available. Please try again.") with tab3: st.subheader("Deep Learning Model") if st.button('Make prediction', key='dl_button'): if combined_data is not None: column_specs = get_column_specs() prepared_data = prepare_data(combined_data, column_specs["timestamp_column"]) train_file = 'data/processed/train_dataset.csv' train_data = pd.read_csv(train_file) train_data = prepare_data(train_data, column_specs["timestamp_column"]) predictions = zeroshot_eval( train_df=train_data, test_df=prepared_data, batch_size=8, model_path="iaravagni/ttm-finetuned-model" ) # Get all step columns step_columns = [col for col in predictions["predictions_df"].columns if col.startswith("Glucose_step_")] # Apply simple diagonal averaging by patient final_results = simple_diagonal_averaging( predictions["predictions_df"], prepared_data, CONTEXT_LENGTH, step_columns ) # Visualize predictions vs actual values fig, ax = plt.subplots(figsize=(10, 6)) # Filter out zero predictions non_zero_mask = final_results['averaged_prediction'] != 0 filtered_results = final_results[non_zero_mask] # Plot predictions (only non-zero values) in green ax.plot(filtered_results['Timestamp'], filtered_results['averaged_prediction'], label='Predicted', alpha=0.7, color='#58A618') # Plot actual values (all data) in blue ax.plot(final_results['Timestamp'], final_results['Glucose'], label='Ground truth', alpha=0.7, color='#1f77b4') ax.set_title('Glucose Predictions vs Actual Values') ax.set_xlabel('Time') ax.set_ylabel('Glucose Level') ax.legend() st.pyplot(fig) # Calculate and display metrics for single patient y_true = final_results['Glucose'][CONTEXT_LENGTH:].reset_index(drop=True) y_pred = final_results['averaged_prediction'][CONTEXT_LENGTH:].reset_index(drop=True) # Filter out zero predictions non_zero_mask = y_pred != 0 y_true_filtered = y_true[non_zero_mask] y_pred_filtered = y_pred[non_zero_mask] if len(y_pred_filtered) > 0: rmse = np.sqrt(root_mean_squared_error(y_true_filtered, y_pred_filtered)) st.subheader("Performance Metrics") st.metric("AVERAGE RMSE", f"{rmse:.4f}") else: st.subheader("Performance Metrics") st.metric("AVERAGE RMSE", "N/A") else: st.error("Data not available. Please try again.") else: st.info("Upload and process data or select a sample dataset to view prediction models.") # Add some spacing st.write("") st.write("") # App information and disclaimer st.markdown(""" --- ### About this App This application is designed to help analyze and predict glucose levels based on glucose measurements, food logs, and physical activity data. The app merges these datasets based on timestamps to identify patterns and make predictions. Please note that this is a demonstration tool and should not be used for medical decisions without consultation with healthcare professionals. """) # Add a footer with the custom color st.markdown(""" """, unsafe_allow_html=True)