Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import os | |
import joblib | |
from sklearn.metrics import root_mean_squared_error | |
from scripts.make_dataset import create_features | |
from scripts.naive_approach import get_column_specs, prepare_data, zeroshot_eval, simple_diagonal_averaging | |
from scripts.ml_approach import format_dataset | |
CONTEXT_LENGTH = 52 | |
PREDICTION_LENGTH = 6 | |
# Custom theme settings | |
st.set_page_config( | |
page_title="Glucose Level Prediction App", | |
page_icon="📊", | |
layout="wide" | |
) | |
# Apply custom styling with CSS | |
st.markdown(""" | |
<style> | |
/* Primary accent color */ | |
.stButton button, .stSelectbox, .stMultiselect, .stSlider, .stNumberInput { | |
border-color: #58A618 !important; | |
} | |
.stProgress .st-bo { | |
background-color: #58A618 !important; | |
} | |
.st-bq { | |
color: #58A618 !important; | |
} | |
/* Header styling */ | |
h1, h2, h3 { | |
color: #58A618 !important; | |
} | |
.stTabs [data-baseweb="tab-list"] button [data-testid="stMarkdownContainer"] p { | |
color: #58A618 !important; | |
} | |
/* Success messages */ | |
.element-container .stAlert.st-ae.st-af { | |
border-color: #58A618 !important; | |
color: #58A618 !important; | |
} | |
/* Link color */ | |
a { | |
color: #58A618 !important; | |
} | |
/* Button color */ | |
.stButton>button { | |
background-color: #58A618 !important; | |
color: white !important; | |
} | |
/* Make background white */ | |
.stApp { | |
background-color: white !important; | |
} | |
/* Tab styling */ | |
.stTabs [data-baseweb="tab-list"] { | |
gap: 10px; | |
} | |
.stTabs [data-baseweb="tab"] { | |
background-color: #f0f0f0; | |
border-radius: 4px 4px 0 0; | |
padding: 10px 16px; | |
border: 1px solid #ccc; | |
} | |
.stTabs [data-baseweb="tab"][aria-selected="true"] { | |
background-color: white; | |
border-bottom: 3px solid #58A618; | |
} | |
</style> | |
""", unsafe_allow_html=True) | |
# Initialize session state variables if they don't exist | |
if 'combined_data' not in st.session_state: | |
st.session_state.combined_data = None | |
if 'files_uploaded' not in st.session_state: | |
st.session_state.files_uploaded = False | |
if 'data_processed' not in st.session_state: | |
st.session_state.data_processed = False | |
# Title and description | |
st.title("Glucose Level Prediction App") | |
st.markdown(""" | |
This app allows you to upload glucose measurements, food logs, and accelerometer data | |
to analyze patterns and predict glucose levels. | |
""") | |
# Choose data source | |
st.subheader("Choose Data Source") | |
data_option = st.selectbox( | |
"Select how you'd like to provide input data:", | |
("Upload files", "Sample A", "Sample B") | |
) | |
glucose_data = None | |
food_data = None | |
accel_data = None | |
combined_data = None | |
show_tabs = False | |
if data_option == "Upload files": | |
st.subheader("Upload Your Data Files") | |
glucose_file = st.file_uploader("Upload Glucose Levels CSV", type=["csv"], key="glucose") | |
food_file = st.file_uploader("Upload Food Logs CSV", type=["csv"], key="food") | |
accel_file = st.file_uploader("Upload Accelerometer Data CSV", type=["csv"], key="accel") | |
st.subheader("Patient Demographics") | |
# Gender selection | |
gender = st.selectbox("Select Patient Gender", options=["Female", "Male", "Other"], index=0) | |
# HbA1c input | |
hba1c = st.number_input("Enter HbA1c (%)", min_value=3.0, max_value=15.0, step=0.1) | |
all_files_uploaded = (glucose_file is not None) and (food_file is not None) and (accel_file is not None) | |
# Attempt to load files if they exist | |
if glucose_file is not None: | |
try: | |
glucose_data = pd.read_csv(glucose_file) | |
st.success("Glucose data loaded successfully!") | |
except Exception as e: | |
st.error(f"Error loading glucose data: {e}") | |
glucose_data = None | |
if food_file is not None: | |
try: | |
food_data = pd.read_csv(food_file) | |
st.success("Food logs loaded successfully!") | |
except Exception as e: | |
st.error(f"Error loading food logs: {e}") | |
food_data = None | |
if accel_file is not None: | |
try: | |
accel_data = pd.read_csv(accel_file) | |
st.success("Accelerometer data loaded successfully!") | |
except Exception as e: | |
st.error(f"Error loading accelerometer data: {e}") | |
accel_data = None | |
# Update the upload status in session state | |
st.session_state.files_uploaded = all_files_uploaded | |
# Show message if not all files are uploaded | |
if not all_files_uploaded: | |
st.warning("Please upload all three data files to enable data processing.") | |
col1, col2, col3 = st.columns([1,1,1]) | |
with col2: | |
# Add a button to process the data - disabled until all files are uploaded | |
if st.button('Process Data', key='process_data_button', disabled=not all_files_uploaded): | |
if all_files_uploaded: | |
try: | |
# Call create_features with appropriate parameters | |
combined_data = create_features( | |
bg_df=glucose_data, | |
food_df=food_data, | |
acc_df=accel_data, | |
gender=gender, | |
hba1c=hba1c, | |
add_patient_id=True | |
) | |
st.session_state.combined_data = combined_data | |
st.session_state.data_processed = True | |
st.success("Data processed successfully!") | |
show_tabs = True | |
except Exception as e: | |
st.error(f"Error processing data: {e}") | |
st.session_state.data_processed = False | |
show_tabs = False | |
st.subheader("Expected File Formats:") | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
st.markdown(""" | |
**Glucose Levels CSV:** | |
- Timestamp column | |
- Glucose measurement values | |
""") | |
with col2: | |
st.markdown(""" | |
**Food Logs CSV:** | |
- Timestamp column | |
- Carbohydrates | |
- Sugar | |
- Calories | |
""") | |
with col3: | |
st.markdown(""" | |
**Accelerometer Data CSV:** | |
- Timestamp column | |
- Activity measurements | |
""") | |
# Check if data was previously processed | |
if st.session_state.data_processed and st.session_state.combined_data is not None: | |
combined_data = st.session_state.combined_data | |
show_tabs = True | |
elif data_option == "Sample A": | |
combined_data_path = 'data/processed/samples/sample_A.csv' | |
combined_data = pd.read_csv(combined_data_path) | |
st.session_state.combined_data = combined_data | |
st.session_state.data_processed = True | |
st.success("Sample A loaded successfully!") | |
show_tabs = True | |
elif data_option == "Sample B": | |
combined_data_path = 'data/processed/samples/sample_B.csv' | |
combined_data = pd.read_csv(combined_data_path) | |
st.session_state.combined_data = combined_data | |
st.session_state.data_processed = True | |
st.success("Sample B loaded successfully!") | |
show_tabs = True | |
# Add some spacing | |
st.write("") | |
st.write("") | |
# Only show tabs if sample data is loaded or user data has been successfully processed | |
if show_tabs: | |
# Create tabs for data exploration | |
tab1, tab2, tab3 = st.tabs(["Naive Model", "Machine Learning Model", "Deep Learning Model"]) | |
with tab1: | |
st.subheader("Naive Model") | |
if st.button('Make prediction', key='naive_button'): | |
if combined_data is not None: | |
# Add your naive model prediction code here | |
try: | |
# Call naive model prediction functions | |
column_specs = get_column_specs() | |
prepared_data = prepare_data(combined_data, column_specs["timestamp_column"]) | |
train_file = 'data/processed/train_dataset.csv' | |
train_data = pd.read_csv(train_file) | |
train_data = prepare_data(train_data, column_specs["timestamp_column"]) | |
predictions = zeroshot_eval( | |
train_df=train_data, | |
test_df=prepared_data, | |
batch_size=8 | |
) | |
# Get all step columns | |
step_columns = [col for col in predictions["predictions_df"].columns if col.startswith("Glucose_step_")] | |
# Apply simple diagonal averaging by patient | |
final_results = simple_diagonal_averaging( | |
predictions["predictions_df"], | |
prepared_data, | |
CONTEXT_LENGTH, | |
step_columns | |
) | |
# Visualize predictions vs actual values | |
fig, ax = plt.subplots(figsize=(10, 6)) | |
# Filter out zero predictions | |
non_zero_mask = final_results['averaged_prediction'] != 0 | |
filtered_results = final_results[non_zero_mask] | |
# Plot predictions (only non-zero values) in green | |
ax.plot(filtered_results['Timestamp'], filtered_results['averaged_prediction'], | |
label='Predicted', alpha=0.7, color='#58A618') | |
# Plot actual values (all data) in blue | |
ax.plot(final_results['Timestamp'], final_results['Glucose'], | |
label='Ground truth', alpha=0.7, color='#1f77b4') | |
ax.set_title('Glucose Predictions vs Actual Values') | |
ax.set_xlabel('Time') | |
ax.set_ylabel('Glucose Level') | |
ax.legend() | |
st.pyplot(fig) | |
y_true = final_results['Glucose'][CONTEXT_LENGTH:].reset_index(drop=True) | |
y_pred = final_results['averaged_prediction'][CONTEXT_LENGTH:].reset_index(drop=True) | |
# Filter out zero predictions | |
non_zero_mask = y_pred != 0 | |
y_true_filtered = y_true[non_zero_mask] | |
y_pred_filtered = y_pred[non_zero_mask] | |
if len(y_pred_filtered) > 0: | |
rmse = np.sqrt(root_mean_squared_error(y_true_filtered, y_pred_filtered)) | |
st.subheader("Performance Metrics") | |
st.metric("AVERAGE RMSE", f"{rmse:.4f}") | |
else: | |
st.subheader("Performance Metrics") | |
st.metric("AVERAGE RMSE", "N/A") | |
except Exception as e: | |
st.error(f"Error in naive model prediction: {e}") | |
else: | |
st.error("Data not available. Please try again.") | |
with tab2: | |
st.subheader("Machine Learning Model") | |
if st.button('Make prediction', key='ml_button'): | |
if combined_data is not None: | |
X_test, y_test = format_dataset(combined_data, CONTEXT_LENGTH, PREDICTION_LENGTH) | |
model_output_path = "models/xgb_model.pkl" | |
xgb_model = joblib.load(model_output_path) | |
y_test_pred = xgb_model.predict(X_test) | |
final_results = simple_diagonal_averaging( | |
pd.DataFrame(y_test_pred), | |
combined_data, | |
CONTEXT_LENGTH, | |
pd.DataFrame(y_test_pred).columns | |
) | |
# Visualize predictions vs actual values | |
fig, ax = plt.subplots(figsize=(10, 6)) | |
# Plot all actual values in blue | |
ax.plot(final_results['Timestamp'], final_results['Glucose'], | |
label='Ground truth', alpha=0.7, color='#1f77b4') | |
# Replace zeros with NaN (which matplotlib will skip when plotting) | |
plot_predictions = final_results['averaged_prediction'].copy() | |
plot_predictions = plot_predictions.replace(0, float('nan')) | |
# Plot predictions with NaN instead of zeros in green | |
ax.plot(final_results['Timestamp'], plot_predictions, | |
label='Predicted', alpha=0.7, color='#58A618') | |
ax.set_title('Glucose Predictions vs Actual Values') | |
ax.set_xlabel('Time') | |
ax.set_ylabel('Glucose Level') | |
ax.legend() | |
st.pyplot(fig) | |
# Calculate and display metrics for single patient | |
y_true = final_results['Glucose'][CONTEXT_LENGTH:].reset_index(drop=True) | |
y_pred = final_results['averaged_prediction'][CONTEXT_LENGTH:].reset_index(drop=True) | |
# Filter out zero predictions | |
non_zero_mask = y_pred != 0 | |
y_true_filtered = y_true[non_zero_mask] | |
y_pred_filtered = y_pred[non_zero_mask] | |
if len(y_pred_filtered) > 0: | |
rmse = np.sqrt(root_mean_squared_error(y_true_filtered, y_pred_filtered)) | |
st.subheader("Performance Metrics") | |
st.metric("AVERAGE RMSE", f"{rmse:.4f}") | |
else: | |
st.subheader("Performance Metrics") | |
st.metric("AVERAGE RMSE", "N/A") | |
else: | |
st.error("Data not available. Please try again.") | |
with tab3: | |
st.subheader("Deep Learning Model") | |
if st.button('Make prediction', key='dl_button'): | |
if combined_data is not None: | |
column_specs = get_column_specs() | |
prepared_data = prepare_data(combined_data, column_specs["timestamp_column"]) | |
train_file = 'data/processed/train_dataset.csv' | |
train_data = pd.read_csv(train_file) | |
train_data = prepare_data(train_data, column_specs["timestamp_column"]) | |
predictions = zeroshot_eval( | |
train_df=train_data, | |
test_df=prepared_data, | |
batch_size=8, | |
model_path="iaravagni/ttm-finetuned-model" | |
) | |
# Get all step columns | |
step_columns = [col for col in predictions["predictions_df"].columns if col.startswith("Glucose_step_")] | |
# Apply simple diagonal averaging by patient | |
final_results = simple_diagonal_averaging( | |
predictions["predictions_df"], | |
prepared_data, | |
CONTEXT_LENGTH, | |
step_columns | |
) | |
# Visualize predictions vs actual values | |
fig, ax = plt.subplots(figsize=(10, 6)) | |
# Filter out zero predictions | |
non_zero_mask = final_results['averaged_prediction'] != 0 | |
filtered_results = final_results[non_zero_mask] | |
# Plot predictions (only non-zero values) in green | |
ax.plot(filtered_results['Timestamp'], filtered_results['averaged_prediction'], | |
label='Predicted', alpha=0.7, color='#58A618') | |
# Plot actual values (all data) in blue | |
ax.plot(final_results['Timestamp'], final_results['Glucose'], | |
label='Ground truth', alpha=0.7, color='#1f77b4') | |
ax.set_title('Glucose Predictions vs Actual Values') | |
ax.set_xlabel('Time') | |
ax.set_ylabel('Glucose Level') | |
ax.legend() | |
st.pyplot(fig) | |
# Calculate and display metrics for single patient | |
y_true = final_results['Glucose'][CONTEXT_LENGTH:].reset_index(drop=True) | |
y_pred = final_results['averaged_prediction'][CONTEXT_LENGTH:].reset_index(drop=True) | |
# Filter out zero predictions | |
non_zero_mask = y_pred != 0 | |
y_true_filtered = y_true[non_zero_mask] | |
y_pred_filtered = y_pred[non_zero_mask] | |
if len(y_pred_filtered) > 0: | |
rmse = np.sqrt(root_mean_squared_error(y_true_filtered, y_pred_filtered)) | |
st.subheader("Performance Metrics") | |
st.metric("AVERAGE RMSE", f"{rmse:.4f}") | |
else: | |
st.subheader("Performance Metrics") | |
st.metric("AVERAGE RMSE", "N/A") | |
else: | |
st.error("Data not available. Please try again.") | |
else: | |
st.info("Upload and process data or select a sample dataset to view prediction models.") | |
# Add some spacing | |
st.write("") | |
st.write("") | |
# App information and disclaimer | |
st.markdown(""" | |
--- | |
### About this App | |
This application is designed to help analyze and predict glucose levels based on glucose measurements, | |
food logs, and physical activity data. The app merges these datasets based on timestamps to identify | |
patterns and make predictions. | |
Please note that this is a demonstration tool and should not be used for medical decisions without | |
consultation with healthcare professionals. | |
""") | |
# Add a footer with the custom color | |
st.markdown(""" | |
<style> | |
.footer { | |
position: fixed; | |
left: 0; | |
bottom: 0; | |
width: 100%; | |
background-color: white; | |
color: #58A618; | |
text-align: center; | |
padding: 10px; | |
border-top: 2px solid #58A618; | |
} | |
</style> | |
<div class="footer"> | |
<p>Glucose Prediction Application © 2025</p> | |
</div> | |
""", unsafe_allow_html=True) |