Spaces:

iaravagni
/

BloodGlucosePrediction

Sleeping

File size: 17,954 Bytes

import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import joblib
from sklearn.metrics import root_mean_squared_error

from scripts.make_dataset import create_features
from scripts.naive_approach import get_column_specs, prepare_data, zeroshot_eval, simple_diagonal_averaging
from scripts.ml_approach import format_dataset

CONTEXT_LENGTH = 52
PREDICTION_LENGTH = 6

# Custom theme settings
st.set_page_config(
    page_title="Glucose Level Prediction App",
    page_icon="📊",
    layout="wide"
)

# Apply custom styling with CSS
st.markdown("""
<style>
    /* Primary accent color */
    .stButton button, .stSelectbox, .stMultiselect, .stSlider, .stNumberInput {
        border-color: #58A618 !important;
    }
    .stProgress .st-bo {
        background-color: #58A618 !important;
    }
    .st-bq {
        color: #58A618 !important;
    }
    /* Header styling */
    h1, h2, h3 {
        color: #58A618 !important;
    }
    .stTabs [data-baseweb="tab-list"] button [data-testid="stMarkdownContainer"] p {
        color: #58A618 !important;
    }
    /* Success messages */
    .element-container .stAlert.st-ae.st-af {
        border-color: #58A618 !important;
        color: #58A618 !important;
    }
    /* Link color */
    a {
        color: #58A618 !important;
    }
    /* Button color */
    .stButton>button {
        background-color: #58A618 !important;
        color: white !important;
    }
    /* Make background white */
    .stApp {
        background-color: white !important;
    }
    /* Tab styling */
    .stTabs [data-baseweb="tab-list"] {
        gap: 10px;
    }
    .stTabs [data-baseweb="tab"] {
        background-color: #f0f0f0;
        border-radius: 4px 4px 0 0;
        padding: 10px 16px;
        border: 1px solid #ccc;
    }
    .stTabs [data-baseweb="tab"][aria-selected="true"] {
        background-color: white;
        border-bottom: 3px solid #58A618;
    }
</style>
""", unsafe_allow_html=True)

# Initialize session state variables if they don't exist
if 'combined_data' not in st.session_state:
    st.session_state.combined_data = None
if 'files_uploaded' not in st.session_state:
    st.session_state.files_uploaded = False
if 'data_processed' not in st.session_state:
    st.session_state.data_processed = False

# Title and description
st.title("Glucose Level Prediction App")
st.markdown("""
This app allows you to upload glucose measurements, food logs, and accelerometer data 
to analyze patterns and predict glucose levels.
""")

# Choose data source
st.subheader("Choose Data Source")
data_option = st.selectbox(
    "Select how you'd like to provide input data:",
    ("Upload files", "Sample A", "Sample B")
)

glucose_data = None
food_data = None
accel_data = None
combined_data = None
show_tabs = False

if data_option == "Upload files":
    st.subheader("Upload Your Data Files")

    glucose_file = st.file_uploader("Upload Glucose Levels CSV", type=["csv"], key="glucose")
    food_file = st.file_uploader("Upload Food Logs CSV", type=["csv"], key="food")
    accel_file = st.file_uploader("Upload Accelerometer Data CSV", type=["csv"], key="accel")
    
    st.subheader("Patient Demographics")

    # Gender selection
    gender = st.selectbox("Select Patient Gender", options=["Female", "Male", "Other"], index=0)

    # HbA1c input
    hba1c = st.number_input("Enter HbA1c (%)", min_value=3.0, max_value=15.0, step=0.1)

    all_files_uploaded = (glucose_file is not None) and (food_file is not None) and (accel_file is not None)

    # Attempt to load files if they exist
    if glucose_file is not None:
        try:
            glucose_data = pd.read_csv(glucose_file)
            st.success("Glucose data loaded successfully!")
        except Exception as e:
            st.error(f"Error loading glucose data: {e}")
            glucose_data = None

    if food_file is not None:
        try:
            food_data = pd.read_csv(food_file)
            st.success("Food logs loaded successfully!")
        except Exception as e:
            st.error(f"Error loading food logs: {e}")
            food_data = None

    if accel_file is not None:
        try:
            accel_data = pd.read_csv(accel_file)
            st.success("Accelerometer data loaded successfully!")
        except Exception as e:
            st.error(f"Error loading accelerometer data: {e}")
            accel_data = None
    
    # Update the upload status in session state
    st.session_state.files_uploaded = all_files_uploaded
    
    # Show message if not all files are uploaded
    if not all_files_uploaded:
        st.warning("Please upload all three data files to enable data processing.")
    
    col1, col2, col3 = st.columns([1,1,1])

    with col2:
        # Add a button to process the data - disabled until all files are uploaded
        if st.button('Process Data', key='process_data_button', disabled=not all_files_uploaded):
            if all_files_uploaded:
                try:
                    # Call create_features with appropriate parameters
                    combined_data = create_features(
                        bg_df=glucose_data,
                        food_df=food_data,
                        acc_df=accel_data,
                        gender=gender,
                        hba1c=hba1c,
                        add_patient_id=True
                    )
                    st.session_state.combined_data = combined_data
                    st.session_state.data_processed = True
                    st.success("Data processed successfully!")
                    show_tabs = True
                except Exception as e:
                    st.error(f"Error processing data: {e}")
                    st.session_state.data_processed = False
                    show_tabs = False

    st.subheader("Expected File Formats:")
        
    col1, col2, col3 = st.columns(3)
    
    with col1:
        st.markdown("""
        **Glucose Levels CSV:**
        - Timestamp column
        - Glucose measurement values
        """)
    
    with col2:
        st.markdown("""
        **Food Logs CSV:**
        - Timestamp column
        - Carbohydrates
        - Sugar
        - Calories
        """)
    
    with col3:
        st.markdown("""
        **Accelerometer Data CSV:**
        - Timestamp column
        - Activity measurements
        """)
    
    # Check if data was previously processed
    if st.session_state.data_processed and st.session_state.combined_data is not None:
        combined_data = st.session_state.combined_data
        show_tabs = True
        
elif data_option == "Sample A":
    combined_data_path = 'data/processed/samples/sample_A.csv'
    combined_data = pd.read_csv(combined_data_path)
    st.session_state.combined_data = combined_data
    st.session_state.data_processed = True
    st.success("Sample A loaded successfully!")
    show_tabs = True

elif data_option == "Sample B":
    combined_data_path = 'data/processed/samples/sample_B.csv'
    combined_data = pd.read_csv(combined_data_path)
    st.session_state.combined_data = combined_data
    st.session_state.data_processed = True
    st.success("Sample B loaded successfully!")
    show_tabs = True

# Add some spacing
st.write("")
st.write("")

# Only show tabs if sample data is loaded or user data has been successfully processed
if show_tabs:
    # Create tabs for data exploration
    tab1, tab2, tab3 = st.tabs(["Naive Model", "Machine Learning Model", "Deep Learning Model"])

    with tab1:
        st.subheader("Naive Model")
        
        if st.button('Make prediction', key='naive_button'):
            if combined_data is not None:
                
                # Add your naive model prediction code here
                try:
                    # Call naive model prediction functions
                    column_specs = get_column_specs()
                    prepared_data = prepare_data(combined_data, column_specs["timestamp_column"])
                    train_file = 'data/processed/train_dataset.csv'
                    train_data = pd.read_csv(train_file)
                    train_data = prepare_data(train_data, column_specs["timestamp_column"])
                    predictions = zeroshot_eval(
                        train_df=train_data,
                        test_df=prepared_data,
                        batch_size=8
                    )
                    
                    # Get all step columns
                    step_columns = [col for col in predictions["predictions_df"].columns if col.startswith("Glucose_step_")]
                    
                    # Apply simple diagonal averaging by patient
                    final_results = simple_diagonal_averaging(
                        predictions["predictions_df"], 
                        prepared_data, 
                        CONTEXT_LENGTH,
                        step_columns
                    )    
                    
                    # Visualize predictions vs actual values
                    fig, ax = plt.subplots(figsize=(10, 6))

                    # Filter out zero predictions
                    non_zero_mask = final_results['averaged_prediction'] != 0
                    filtered_results = final_results[non_zero_mask]

                    # Plot predictions (only non-zero values) in green
                    ax.plot(filtered_results['Timestamp'], filtered_results['averaged_prediction'], 
                            label='Predicted', alpha=0.7, color='#58A618')

                    # Plot actual values (all data) in blue
                    ax.plot(final_results['Timestamp'], final_results['Glucose'], 
                            label='Ground truth', alpha=0.7, color='#1f77b4')

                    ax.set_title('Glucose Predictions vs Actual Values')
                    ax.set_xlabel('Time')
                    ax.set_ylabel('Glucose Level')
                    ax.legend()

                    st.pyplot(fig)

                    y_true = final_results['Glucose'][CONTEXT_LENGTH:].reset_index(drop=True)
                    y_pred = final_results['averaged_prediction'][CONTEXT_LENGTH:].reset_index(drop=True)
                    
                    # Filter out zero predictions
                    non_zero_mask = y_pred != 0
                    y_true_filtered = y_true[non_zero_mask]
                    y_pred_filtered = y_pred[non_zero_mask]
                    
                    if len(y_pred_filtered) > 0:
                        rmse = np.sqrt(root_mean_squared_error(y_true_filtered, y_pred_filtered))
                        st.subheader("Performance Metrics")
                        st.metric("AVERAGE RMSE", f"{rmse:.4f}")
                    else:
                        st.subheader("Performance Metrics")
                        st.metric("AVERAGE RMSE", "N/A")

                except Exception as e:
                    st.error(f"Error in naive model prediction: {e}")
            else:
                st.error("Data not available. Please try again.")

    with tab2:
        st.subheader("Machine Learning Model")
        
        if st.button('Make prediction', key='ml_button'):
            if combined_data is not None:
                X_test, y_test = format_dataset(combined_data, CONTEXT_LENGTH, PREDICTION_LENGTH)

                model_output_path = "models/xgb_model.pkl"
                xgb_model = joblib.load(model_output_path)

                y_test_pred = xgb_model.predict(X_test)

                final_results = simple_diagonal_averaging(
                    pd.DataFrame(y_test_pred), 
                    combined_data, 
                    CONTEXT_LENGTH,
                    pd.DataFrame(y_test_pred).columns
                )

                # Visualize predictions vs actual values
                fig, ax = plt.subplots(figsize=(10, 6))

                # Plot all actual values in blue
                ax.plot(final_results['Timestamp'], final_results['Glucose'], 
                        label='Ground truth', alpha=0.7, color='#1f77b4')
                
                # Replace zeros with NaN (which matplotlib will skip when plotting)
                plot_predictions = final_results['averaged_prediction'].copy()
                plot_predictions = plot_predictions.replace(0, float('nan'))
                
                # Plot predictions with NaN instead of zeros in green
                ax.plot(final_results['Timestamp'], plot_predictions, 
                        label='Predicted', alpha=0.7, color='#58A618')

                ax.set_title('Glucose Predictions vs Actual Values')
                ax.set_xlabel('Time')
                ax.set_ylabel('Glucose Level')
                ax.legend()

                st.pyplot(fig)

                # Calculate and display metrics for single patient
                y_true = final_results['Glucose'][CONTEXT_LENGTH:].reset_index(drop=True)
                y_pred = final_results['averaged_prediction'][CONTEXT_LENGTH:].reset_index(drop=True)
                
                # Filter out zero predictions
                non_zero_mask = y_pred != 0
                y_true_filtered = y_true[non_zero_mask]
                y_pred_filtered = y_pred[non_zero_mask]
                
                if len(y_pred_filtered) > 0:
                    rmse = np.sqrt(root_mean_squared_error(y_true_filtered, y_pred_filtered))
                    st.subheader("Performance Metrics")
                    st.metric("AVERAGE RMSE", f"{rmse:.4f}")
                else:
                    st.subheader("Performance Metrics")
                    st.metric("AVERAGE RMSE", "N/A")
            
            else:
                st.error("Data not available. Please try again.")

    with tab3:
        st.subheader("Deep Learning Model")
        
        if st.button('Make prediction', key='dl_button'):
            if combined_data is not None:
                column_specs = get_column_specs()
                prepared_data = prepare_data(combined_data, column_specs["timestamp_column"])
                
                train_file = 'data/processed/train_dataset.csv'
                train_data = pd.read_csv(train_file)
                train_data = prepare_data(train_data, column_specs["timestamp_column"])
                predictions = zeroshot_eval(
                    train_df=train_data,
                    test_df=prepared_data,
                    batch_size=8,
                    model_path="iaravagni/ttm-finetuned-model"
                )
                
                # Get all step columns
                step_columns = [col for col in predictions["predictions_df"].columns if col.startswith("Glucose_step_")]
                
                # Apply simple diagonal averaging by patient
                final_results = simple_diagonal_averaging(
                    predictions["predictions_df"], 
                    prepared_data, 
                    CONTEXT_LENGTH,
                    step_columns
                )    
                
                # Visualize predictions vs actual values
                fig, ax = plt.subplots(figsize=(10, 6))

                # Filter out zero predictions
                non_zero_mask = final_results['averaged_prediction'] != 0
                filtered_results = final_results[non_zero_mask]

                # Plot predictions (only non-zero values) in green
                ax.plot(filtered_results['Timestamp'], filtered_results['averaged_prediction'], 
                        label='Predicted', alpha=0.7, color='#58A618')

                # Plot actual values (all data) in blue
                ax.plot(final_results['Timestamp'], final_results['Glucose'], 
                        label='Ground truth', alpha=0.7, color='#1f77b4')

                ax.set_title('Glucose Predictions vs Actual Values')
                ax.set_xlabel('Time')
                ax.set_ylabel('Glucose Level')
                ax.legend()

                st.pyplot(fig)

                # Calculate and display metrics for single patient
                y_true = final_results['Glucose'][CONTEXT_LENGTH:].reset_index(drop=True)
                y_pred = final_results['averaged_prediction'][CONTEXT_LENGTH:].reset_index(drop=True)
                
                # Filter out zero predictions
                non_zero_mask = y_pred != 0
                y_true_filtered = y_true[non_zero_mask]
                y_pred_filtered = y_pred[non_zero_mask]
                
                if len(y_pred_filtered) > 0:
                    rmse = np.sqrt(root_mean_squared_error(y_true_filtered, y_pred_filtered))
                    st.subheader("Performance Metrics")
                    st.metric("AVERAGE RMSE", f"{rmse:.4f}")
                else:
                    st.subheader("Performance Metrics")
                    st.metric("AVERAGE RMSE", "N/A")
            else:
                st.error("Data not available. Please try again.")
else:
    st.info("Upload and process data or select a sample dataset to view prediction models.")

# Add some spacing
st.write("")
st.write("")

# App information and disclaimer
st.markdown("""
---
### About this App

This application is designed to help analyze and predict glucose levels based on glucose measurements,
food logs, and physical activity data. The app merges these datasets based on timestamps to identify
patterns and make predictions.

Please note that this is a demonstration tool and should not be used for medical decisions without
consultation with healthcare professionals.

""")

# Add a footer with the custom color
st.markdown("""
<style>
.footer {
    position: fixed;
    left: 0;
    bottom: 0;
    width: 100%;
    background-color: white;
    color: #58A618;
    text-align: center;
    padding: 10px;
    border-top: 2px solid #58A618;
}
</style>
<div class="footer">
    <p>Glucose Prediction Application © 2025</p>
</div>
""", unsafe_allow_html=True)