Spaces:
Build error
Build error
Upload 11 files
Browse files- Utility/__pycache__/data_loader.cpython-312.pyc +0 -0
- Utility/data_loader.py +26 -0
- app.py +101 -0
- merged_df.csv +0 -0
- pages/Dashboard.py +338 -0
- pages/predict page.py +204 -0
- requirements.txt +11 -0
- sample_submission.csv +7 -0
- test_series.parquet +3 -0
- train_events.csv +0 -0
- train_series.parquet +3 -0
Utility/__pycache__/data_loader.cpython-312.pyc
ADDED
|
Binary file (1.64 kB). View file
|
|
|
Utility/data_loader.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import pyarrow.dataset as ds
|
| 4 |
+
|
| 5 |
+
# Load and cache data
|
| 6 |
+
@st.cache_data
|
| 7 |
+
def load_data(n_rows=1_000_000):
|
| 8 |
+
dataset = ds.dataset("train_series.parquet", format="parquet")
|
| 9 |
+
table = dataset.scanner().head(n_rows)
|
| 10 |
+
return table.to_pandas()
|
| 11 |
+
|
| 12 |
+
@st.cache_data
|
| 13 |
+
def load_train_series():
|
| 14 |
+
return load_data()
|
| 15 |
+
|
| 16 |
+
@st.cache_data
|
| 17 |
+
def load_train_events():
|
| 18 |
+
return pd.read_csv("train_events.csv")
|
| 19 |
+
|
| 20 |
+
@st.cache_data
|
| 21 |
+
def load_sample_submission():
|
| 22 |
+
return pd.read_csv("sample_submission.csv")
|
| 23 |
+
|
| 24 |
+
@st.cache_data
|
| 25 |
+
def load_test_series():
|
| 26 |
+
return pd.read_parquet("test_series.parquet")
|
app.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from Utility.data_loader import (
|
| 4 |
+
load_train_series, load_train_events,
|
| 5 |
+
load_sample_submission, load_test_series
|
| 6 |
+
)
|
| 7 |
+
|
| 8 |
+
st.set_page_config(page_title="Sleep Detection", layout="wide")
|
| 9 |
+
st.title("Sleep Detection")
|
| 10 |
+
|
| 11 |
+
st.markdown("""
|
| 12 |
+
### 📊 About the App
|
| 13 |
+
|
| 14 |
+
This **Sleep Detection App** uses sensor data collected over time to predict sleep-related events such as *onset* or *wake-up*. The application allows users to analyze sleep patterns based on movement data and provides predictions using a machine learning model trained on labeled sensor events.
|
| 15 |
+
|
| 16 |
+
---
|
| 17 |
+
|
| 18 |
+
### 🧾 Data Description
|
| 19 |
+
|
| 20 |
+
Each row in the dataset represents a time-stamped sensor reading with the following key columns:
|
| 21 |
+
|
| 22 |
+
- **series_id**: Unique identifier for a sleep session or user.
|
| 23 |
+
- **step**: Sequence number of the reading.
|
| 24 |
+
- **sensor_timestamp**: The time when the sensor reading was recorded.
|
| 25 |
+
- **anglez**: Z-axis body orientation angle (used as a feature).
|
| 26 |
+
- **enmo**: Euclidean Norm Minus One – a movement magnitude metric (used as a feature).
|
| 27 |
+
- **night**: Night identifier (used to separate sessions).
|
| 28 |
+
- **event**: The sleep-related label (e.g., `onset`, `wake`) indicating the event type.
|
| 29 |
+
- **event_timestamp**: Timestamp of the actual sleep event (used to calculate sleep duration).
|
| 30 |
+
|
| 31 |
+
---
|
| 32 |
+
|
| 33 |
+
### 🤖 App Capabilities
|
| 34 |
+
|
| 35 |
+
- Displays raw sensor data and sleep event counts.
|
| 36 |
+
- Trains an ML model (XGBoost) using movement features (`anglez`, `enmo`) to predict sleep events.
|
| 37 |
+
- Allows real-time prediction of sleep events based on user input.
|
| 38 |
+
- Displays evaluation metrics: **Accuracy**, **F1 Score**, **ROC AUC Score**.
|
| 39 |
+
|
| 40 |
+
---
|
| 41 |
+
""")
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
# --- Sidebar Radio Button ---
|
| 46 |
+
st.header("Select Dataset to View")
|
| 47 |
+
option = st.radio(
|
| 48 |
+
"Choose a dataset:",
|
| 49 |
+
("Train Events","Train Series", "Test Series", "Summary")
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
# --- Load and Show Data Based on Selection ---
|
| 53 |
+
df = None
|
| 54 |
+
|
| 55 |
+
if option == "Train Events":
|
| 56 |
+
df = load_train_events()
|
| 57 |
+
st.subheader("Train Events")
|
| 58 |
+
st.dataframe(df.head())
|
| 59 |
+
|
| 60 |
+
elif option == "Sample Submission":
|
| 61 |
+
df = load_sample_submission()
|
| 62 |
+
st.subheader("Sample Submission")
|
| 63 |
+
st.dataframe(df.head())
|
| 64 |
+
|
| 65 |
+
elif option == "Train Series":
|
| 66 |
+
df = load_train_series()
|
| 67 |
+
st.subheader("Train Series (1M rows sample)")
|
| 68 |
+
st.dataframe(df.head())
|
| 69 |
+
|
| 70 |
+
elif option == "Test Series":
|
| 71 |
+
df = load_test_series()
|
| 72 |
+
st.subheader("Test Series")
|
| 73 |
+
st.dataframe(df.head())
|
| 74 |
+
|
| 75 |
+
elif option == "Summary":
|
| 76 |
+
st.subheader("Summary of All Key Datasets")
|
| 77 |
+
|
| 78 |
+
with st.expander("📄 Train Events"):
|
| 79 |
+
df_events = load_train_events()
|
| 80 |
+
st.dataframe(df_events.head())
|
| 81 |
+
st.write("Summary:")
|
| 82 |
+
st.dataframe(df_events.describe(include="all"))
|
| 83 |
+
|
| 84 |
+
with st.expander("📄 Sample Submission"):
|
| 85 |
+
df_sample = load_sample_submission()
|
| 86 |
+
st.dataframe(df_sample.head())
|
| 87 |
+
st.write("Summary:")
|
| 88 |
+
st.dataframe(df_sample.describe(include="all"))
|
| 89 |
+
|
| 90 |
+
with st.expander("📄 Train Series"):
|
| 91 |
+
df_series = load_train_series()
|
| 92 |
+
st.dataframe(df_series.head())
|
| 93 |
+
st.write("Summary:")
|
| 94 |
+
st.dataframe(df_series.describe())
|
| 95 |
+
|
| 96 |
+
with st.expander("📄 Test Series"):
|
| 97 |
+
df_test = load_test_series()
|
| 98 |
+
st.dataframe(df_test.head())
|
| 99 |
+
st.write("Summary:")
|
| 100 |
+
st.dataframe(df_test.describe())
|
| 101 |
+
|
merged_df.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pages/Dashboard.py
ADDED
|
@@ -0,0 +1,338 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import seaborn as sns
|
| 4 |
+
import matplotlib.pyplot as plt
|
| 5 |
+
from Utility.data_loader import load_train_series,load_train_events,load_sample_submission,load_test_series
|
| 6 |
+
from sklearn.model_selection import train_test_split
|
| 7 |
+
from sklearn.preprocessing import LabelEncoder, StandardScaler
|
| 8 |
+
from xgboost import XGBClassifier # or XGBRegressor depending on your task
|
| 9 |
+
import xgboost as xgb
|
| 10 |
+
import numpy as np
|
| 11 |
+
|
| 12 |
+
@st.cache_data
|
| 13 |
+
def load_sampled_data():
|
| 14 |
+
# df3 = pd.read_parquet("train_series.parquet", columns=['series_id', 'step', 'anglez', 'enmo'])
|
| 15 |
+
# df4 = pd.read_parquet("test_series.parquet", columns=['series_id', 'step', 'anglez', 'enmo'])
|
| 16 |
+
df2 = pd.read_csv("train_events.csv")
|
| 17 |
+
|
| 18 |
+
# Sample safely based on available data
|
| 19 |
+
# df3_sample = df3.sample(n=min(5_000_000, len(df3)), random_state=42)
|
| 20 |
+
# df4_sample = df4.sample(n=min(1_000_000, len(df4)), random_state=42)
|
| 21 |
+
|
| 22 |
+
return df2
|
| 23 |
+
|
| 24 |
+
# Load
|
| 25 |
+
# df3, df4, df2 = load_sampled_data()
|
| 26 |
+
df2 = load_sampled_data()
|
| 27 |
+
# df = pd.concat([df3, df4], axis=0, ignore_index=True)
|
| 28 |
+
# merged_df = pd.merge(df, df2, on=['series_id', 'step'], how='inner')
|
| 29 |
+
|
| 30 |
+
merged_df = pd.read_csv("merged_df.csv")
|
| 31 |
+
|
| 32 |
+
# Rename timestamp columns if they exist
|
| 33 |
+
if 'timestamp_x' in merged_df.columns:
|
| 34 |
+
merged_df.rename(columns={'timestamp_x': 'sensor_timestamp'}, inplace=True)
|
| 35 |
+
if 'timestamp_y' in merged_df.columns:
|
| 36 |
+
merged_df.rename(columns={'timestamp_y': 'event_timestamp'}, inplace=True)
|
| 37 |
+
|
| 38 |
+
st.title("📊 Step Distribution Analysis")
|
| 39 |
+
|
| 40 |
+
# Layout: 2 columns
|
| 41 |
+
col1, col2 = st.columns([1, 1]) # Equal width
|
| 42 |
+
# ----- Column 1: Boxplot -----
|
| 43 |
+
with col1:
|
| 44 |
+
st.subheader("📦 Boxplot of Step")
|
| 45 |
+
fig, ax = plt.subplots(figsize=(6, 4)) # Adjusted for better visibility
|
| 46 |
+
sns.boxplot(x=df2['step'], ax=ax, color='steelblue')
|
| 47 |
+
ax.set_title("Distribution of Step Count", fontsize=14)
|
| 48 |
+
ax.set_xlabel("Step", fontsize=12)
|
| 49 |
+
st.pyplot (fig)
|
| 50 |
+
|
| 51 |
+
# ----- Column 2: Insights -----
|
| 52 |
+
with col2:
|
| 53 |
+
st.subheader("🧠 Insights from the Boxplot")
|
| 54 |
+
st.markdown("""
|
| 55 |
+
<small>
|
| 56 |
+
<b>Central Tendency:</b><br>
|
| 57 |
+
- The <b>median</b> is close to the center of the box, suggesting a fairly symmetric distribution within the interquartile range (IQR).<br>
|
| 58 |
+
<b>Spread:</b><br>
|
| 59 |
+
- A <b>wide IQR</b> indicates significant variability in the step counts across sessions.<br>
|
| 60 |
+
<b>Outliers:</b><br>
|
| 61 |
+
- The <b>dots on the right</b> are outliers — representing very high step counts.<br>
|
| 62 |
+
- These could reflect either:<br>
|
| 63 |
+
- <b>Legitimate long-duration recordings</b><br>
|
| 64 |
+
- Or <b>data quality issues</b> (e.g., duplication or sensor errors)
|
| 65 |
+
<b>Distribution Shape:</b><br>
|
| 66 |
+
- A <b>longer left whisker</b> implies a <b>left-skewed</b> distribution.<br>
|
| 67 |
+
- Most sessions have <b>lower step values</b>, with a few very high outliers.
|
| 68 |
+
</small>
|
| 69 |
+
""", unsafe_allow_html=True)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
#st.write("1. Data Visualization - Scatter Plot (feature vs feature or vs target)")
|
| 73 |
+
# Assume merged_df is already defined or loaded
|
| 74 |
+
df_sample = merged_df # or use df_sample = merged_df.sample(n=50000) to downsample
|
| 75 |
+
|
| 76 |
+
st.subheader("Scatter Plot: anglez vs enmo")
|
| 77 |
+
|
| 78 |
+
col1, col2 = st.columns([1, 1])
|
| 79 |
+
|
| 80 |
+
with col1:
|
| 81 |
+
#st.subheader("Scatter Plot: anglez vs enmo")
|
| 82 |
+
# fig, ax = plt.subplots(figsize=(6, 4))
|
| 83 |
+
# sns.scatterplot(x=df['anglez'], y=df['enmo'], ax=ax)
|
| 84 |
+
# ax.set_title("Scatter Plot: anglez vs enmo")
|
| 85 |
+
# st.pyplot(fig)
|
| 86 |
+
|
| 87 |
+
# Create the plot
|
| 88 |
+
fig, ax = plt.subplots(figsize=(6, 4))
|
| 89 |
+
sns.scatterplot(x='anglez', y='enmo', data=df_sample, ax=ax)
|
| 90 |
+
ax.set_title("Scatter Plot: anglez vs enmo")
|
| 91 |
+
|
| 92 |
+
# Display in Streamlit
|
| 93 |
+
st.pyplot(fig)
|
| 94 |
+
|
| 95 |
+
with col2:
|
| 96 |
+
st.markdown("""
|
| 97 |
+
<small>
|
| 98 |
+
<b>1. Clustered Points:</b> Most `enmo` values are near 0, suggesting low movement.<br>
|
| 99 |
+
<b>2. Symmetry:</b> Spread is balanced on both sides of anglez (±), indicating no directional bias.<br>
|
| 100 |
+
<b>3. Weak Correlation:</b> No visible trend, suggesting independence between `anglez` and `enmo`.<br>
|
| 101 |
+
<b>4. Outliers:</b> A few high `enmo` points may indicate sudden or intense movement.<br>
|
| 102 |
+
<b>5. Interpretation:</b> Most data reflects light activity or rest, regardless of body orientation.
|
| 103 |
+
</small>
|
| 104 |
+
""", unsafe_allow_html=True)
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
# df_sample = merged_df.sample(n=10000) # adjust sample size for performance
|
| 108 |
+
|
| 109 |
+
# # Subheader
|
| 110 |
+
# st.subheader("Pair Plot of Features")
|
| 111 |
+
|
| 112 |
+
# # Create pairplot
|
| 113 |
+
# fig = sns.pairplot(df_sample[['anglez', 'enmo', 'step']])
|
| 114 |
+
# fig.fig.suptitle("Pair Plot of Features", y=1.02)
|
| 115 |
+
|
| 116 |
+
# # Display in Streamlit
|
| 117 |
+
# st.pyplot(fig)
|
| 118 |
+
# Define columns to plot
|
| 119 |
+
|
| 120 |
+
col1, col2 = st.columns([1, 1]) # Equal width
|
| 121 |
+
|
| 122 |
+
# Column 1: Pair Plot
|
| 123 |
+
with col1:
|
| 124 |
+
st.subheader("📈 Pair Plot of Features")
|
| 125 |
+
fig = sns.pairplot(merged_df[['anglez', 'enmo', 'step']])
|
| 126 |
+
st.pyplot(fig)
|
| 127 |
+
|
| 128 |
+
# Column 2: Insights
|
| 129 |
+
with col2:
|
| 130 |
+
st.subheader("🧠 Insights from Pair Plot")
|
| 131 |
+
st.markdown("""
|
| 132 |
+
<div style='font-size: 14px'>
|
| 133 |
+
|
| 134 |
+
### 📊 Distribution Insights:
|
| 135 |
+
- **anglez**: Symmetric distribution peaking near -50 to 0.
|
| 136 |
+
- **enmo**: Right-skewed, most values below 0.1.
|
| 137 |
+
- **step**: Right-skewed, with a few large outliers.
|
| 138 |
+
|
| 139 |
+
### 🔁 Pairwise Relationships:
|
| 140 |
+
- **anglez vs enmo**: No clear trend; cone-like shape.
|
| 141 |
+
- **anglez vs step**: No correlation; looks uniformly scattered.
|
| 142 |
+
- **enmo vs step**: Clustered at low values. High steps sometimes with low enmo.
|
| 143 |
+
|
| 144 |
+
### 💡 Summary:
|
| 145 |
+
- Features appear largely **uncorrelated**.
|
| 146 |
+
- Helps identify **data distributions** and potential **outliers**.
|
| 147 |
+
- Can assist in **feature selection/engineering**.
|
| 148 |
+
|
| 149 |
+
</div>
|
| 150 |
+
""", unsafe_allow_html=True)
|
| 151 |
+
|
| 152 |
+
# plot_columns = ['anglez', 'enmo', 'step']
|
| 153 |
+
|
| 154 |
+
# # Safety check: make sure required columns exist
|
| 155 |
+
# if all(col in merged_df.columns for col in plot_columns):
|
| 156 |
+
|
| 157 |
+
# # Check data size and sample accordingly
|
| 158 |
+
# max_rows = len(merged_df)
|
| 159 |
+
# sample_size = min(10000, max_rows) # Don't exceed available rows
|
| 160 |
+
|
| 161 |
+
# df_sample = merged_df.sample(n=sample_size)
|
| 162 |
+
|
| 163 |
+
# # Subheader
|
| 164 |
+
# st.subheader("Pair Plot of Features")
|
| 165 |
+
|
| 166 |
+
# # Create pairplot
|
| 167 |
+
# fig = sns.pairplot(df_sample[plot_columns])
|
| 168 |
+
# fig.fig.suptitle("Pair Plot of Features", y=1.02)
|
| 169 |
+
|
| 170 |
+
# # Display in Streamlit
|
| 171 |
+
# st.pyplot(fig)
|
| 172 |
+
|
| 173 |
+
# else:
|
| 174 |
+
# st.error("One or more required columns ('anglez', 'enmo', 'step') are missing in the dataset.")
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
# Plot
|
| 178 |
+
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
|
| 179 |
+
|
| 180 |
+
sns.histplot(df_sample['anglez'], kde=True, bins=50, ax=axes[0])
|
| 181 |
+
axes[0].set_title("Distribution of anglez")
|
| 182 |
+
|
| 183 |
+
sns.histplot(df_sample['enmo'], kde=True, bins=50, ax=axes[1])
|
| 184 |
+
axes[1].set_title("Distribution of enmo")
|
| 185 |
+
|
| 186 |
+
plt.tight_layout()
|
| 187 |
+
st.pyplot(fig)
|
| 188 |
+
|
| 189 |
+
# Show insights side by side
|
| 190 |
+
col1, col2 = st.columns(2)
|
| 191 |
+
|
| 192 |
+
with col1:
|
| 193 |
+
st.markdown("""
|
| 194 |
+
<div style='font-size: 14px'>
|
| 195 |
+
<h3> 📈 Distribution of `anglez`: </h3>
|
| 196 |
+
- The distribution is **roughly symmetric**, centered around **-50 to 0**.
|
| 197 |
+
- It resembles a **left-heavy bell shape**, suggesting:
|
| 198 |
+
- Most sensor angles were **tilted negatively**.
|
| 199 |
+
- Indicates a **natural resting position** or specific posture.
|
| 200 |
+
</div>
|
| 201 |
+
""", unsafe_allow_html=True)
|
| 202 |
+
|
| 203 |
+
with col2:
|
| 204 |
+
st.markdown("""
|
| 205 |
+
<div style='font-size: 14px'>
|
| 206 |
+
<h3> 📉 Distribution of `enmo`: </h3>
|
| 207 |
+
- Highly **right-skewed** (sharp peak near zero).
|
| 208 |
+
- The majority of `enmo` values are **very small** (< 0.05), indicating:
|
| 209 |
+
- **Minimal movement or low activity** in most sessions.
|
| 210 |
+
- Few data points reflect **moderate to high movement**.
|
| 211 |
+
</div>
|
| 212 |
+
""", unsafe_allow_html=True)
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
# st.write("Multicollinearity Check - Correlation Matrix")
|
| 217 |
+
# features = ['anglez', 'enmo', 'step', 'night']
|
| 218 |
+
# df_subset = merged_df[features]
|
| 219 |
+
|
| 220 |
+
# # Streamlit title
|
| 221 |
+
# st.subheader("Multicollinearity Check - Correlation Matrix")
|
| 222 |
+
|
| 223 |
+
# # Calculate correlation matrix
|
| 224 |
+
# corr_matrix = df_subset.corr()
|
| 225 |
+
|
| 226 |
+
# # Plot heatmap
|
| 227 |
+
# fig, ax = plt.subplots(figsize=(6, 4))
|
| 228 |
+
# sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', ax=ax)
|
| 229 |
+
# ax.set_title("Correlation Matrix")
|
| 230 |
+
|
| 231 |
+
# # Display in Streamlit
|
| 232 |
+
# st.pyplot(fig)
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
st.subheader("Multicollinearity Check - Correlation Matrix")
|
| 236 |
+
|
| 237 |
+
# Select relevant features
|
| 238 |
+
features = ['anglez', 'enmo', 'step', 'night']
|
| 239 |
+
df_subset = merged_df[features]
|
| 240 |
+
|
| 241 |
+
# Calculate correlation matrix
|
| 242 |
+
corr_matrix = df_subset.corr()
|
| 243 |
+
|
| 244 |
+
# Create plot
|
| 245 |
+
fig, ax = plt.subplots(figsize=(6, 4))
|
| 246 |
+
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0, fmt=".3f", ax=ax)
|
| 247 |
+
ax.set_title("Correlation Matrix")
|
| 248 |
+
|
| 249 |
+
# Layout in two columns
|
| 250 |
+
col1, col2 = st.columns(2)
|
| 251 |
+
|
| 252 |
+
# Column 1: Heatmap
|
| 253 |
+
with col1:
|
| 254 |
+
st.pyplot(fig)
|
| 255 |
+
|
| 256 |
+
# Column 2: Textual Insights
|
| 257 |
+
with col2:
|
| 258 |
+
st.markdown("""
|
| 259 |
+
### 🔍 Insights from Correlation Matrix
|
| 260 |
+
|
| 261 |
+
- **`anglez` & `enmo`**:
|
| 262 |
+
🔸 Weak negative correlation (**-0.11**) — suggests minimal linear relationship.
|
| 263 |
+
|
| 264 |
+
- **`step` & `night`**:
|
| 265 |
+
⚠️ Perfect correlation (**1.00**) — indicates **redundancy**, likely representing the same event in different forms.
|
| 266 |
+
|
| 267 |
+
- **Overall**:
|
| 268 |
+
✅ Low multicollinearity across most features — safe for modeling.
|
| 269 |
+
📝 Recommend removing either `step` or `night` to reduce feature duplication.
|
| 270 |
+
""")
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
# Encode
|
| 274 |
+
le = LabelEncoder()
|
| 275 |
+
merged_df['series_id'] = le.fit_transform(merged_df['series_id'])
|
| 276 |
+
merged_df['event'] = le.fit_transform(merged_df['event'])
|
| 277 |
+
|
| 278 |
+
# Drop columns with string or datetime values
|
| 279 |
+
drop_cols = ['sensor_timestamp', 'event_timestamp', 'night', 'step', 'sleep_duration_hrs', 'series_id']
|
| 280 |
+
df_cleaned = merged_df.drop(columns=[col for col in drop_cols if col in merged_df.columns])
|
| 281 |
+
|
| 282 |
+
# Ensure only numeric features in X
|
| 283 |
+
X = df_cleaned.drop('event', axis=1).select_dtypes(include=[np.number])
|
| 284 |
+
y = merged_df['event']
|
| 285 |
+
|
| 286 |
+
# Split and scale
|
| 287 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=27)
|
| 288 |
+
|
| 289 |
+
st.subheader("Feature Importance")
|
| 290 |
+
# Create model instance
|
| 291 |
+
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss') # example for classification
|
| 292 |
+
|
| 293 |
+
# Fit the model
|
| 294 |
+
xgb_model.fit(X_train, y_train)
|
| 295 |
+
|
| 296 |
+
# Plot feature importance
|
| 297 |
+
fig, ax = plt.subplots(figsize=(6, 4))
|
| 298 |
+
xgb.plot_importance(xgb_model, ax=ax)
|
| 299 |
+
ax.set_title("XGBoost Feature Importance")
|
| 300 |
+
|
| 301 |
+
# Show in Streamlit
|
| 302 |
+
st.subheader("XGBoost Feature Importance")
|
| 303 |
+
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
col1, col2 = st.columns(2)
|
| 307 |
+
|
| 308 |
+
# Column 1: Plot
|
| 309 |
+
with col1:
|
| 310 |
+
st.pyplot(fig)
|
| 311 |
+
st.markdown("""
|
| 312 |
+
#### 🚫 Low-Impact Features:
|
| 313 |
+
- Features like `step` and `night` (excluded in this plot) showed **minimal or redundant contribution**.
|
| 314 |
+
- 🔁 You may consider **removing** them to simplify the model.
|
| 315 |
+
""")
|
| 316 |
+
# Column 2: Insights
|
| 317 |
+
with col2:
|
| 318 |
+
st.markdown("""
|
| 319 |
+
<small>
|
| 320 |
+
<h3> 🔍 XGBoost Feature Importance: Key Insights </h3>
|
| 321 |
+
|
| 322 |
+
#### 📌 Top Features:
|
| 323 |
+
- 🔹 **`anglez`** — Highest importance score (**1557**)
|
| 324 |
+
- 🔹 **`enmo`** — Close second with score (**1546**)
|
| 325 |
+
|
| 326 |
+
#### ✅ Summary:
|
| 327 |
+
- Both `anglez` and `enmo` contribute **significantly** to the model.
|
| 328 |
+
- Their high scores reflect **strong influence** in predicting the target variable.
|
| 329 |
+
|
| 330 |
+
#### 💡 Interpretation:
|
| 331 |
+
- These features likely capture **activity level** or **sleep posture** patterns.
|
| 332 |
+
- Keeping both is **recommended** for accurate classification.
|
| 333 |
+
</small>
|
| 334 |
+
|
| 335 |
+
""", unsafe_allow_html=True)
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
|
pages/predict page.py
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import numpy as np
|
| 5 |
+
from sklearn.preprocessing import LabelEncoder, StandardScaler
|
| 6 |
+
from sklearn.model_selection import train_test_split
|
| 7 |
+
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
|
| 8 |
+
from xgboost import XGBClassifier
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
st.title("🧠 Sleep Event Prediction")
|
| 12 |
+
|
| 13 |
+
# --- Load and preprocess data ---
|
| 14 |
+
merged_df = pd.read_csv("merged_df.csv")
|
| 15 |
+
st.subheader("Raw Data Sample")
|
| 16 |
+
st.dataframe(merged_df.head())
|
| 17 |
+
|
| 18 |
+
# Drop nulls in important columns
|
| 19 |
+
merged_df = merged_df.dropna(subset=['night', 'event', 'event_timestamp'])
|
| 20 |
+
|
| 21 |
+
# Convert timestamps
|
| 22 |
+
merged_df['event_timestamp'] = pd.to_datetime(merged_df['event_timestamp'], format='%Y-%m-%dT%H:%M:%S%z', utc=True)
|
| 23 |
+
merged_df['sensor_timestamp'] = pd.to_datetime(merged_df['sensor_timestamp'], format='%Y-%m-%dT%H:%M:%S%z', utc=True)
|
| 24 |
+
|
| 25 |
+
# Calculate duration
|
| 26 |
+
merged_df['sleep_duration_hrs'] = (merged_df['sensor_timestamp'] - merged_df['event_timestamp']).dt.total_seconds() / 3600
|
| 27 |
+
|
| 28 |
+
# Encode categorical columns
|
| 29 |
+
le_event = LabelEncoder()
|
| 30 |
+
merged_df['event_encoded'] = le_event.fit_transform(merged_df['event'])
|
| 31 |
+
|
| 32 |
+
le_series = LabelEncoder()
|
| 33 |
+
merged_df['series_id_encoded'] = le_series.fit_transform(merged_df['series_id'])
|
| 34 |
+
|
| 35 |
+
# Select features
|
| 36 |
+
X = merged_df[['anglez', 'enmo']]
|
| 37 |
+
y = merged_df['event_encoded']
|
| 38 |
+
|
| 39 |
+
# Train-test split
|
| 40 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
|
| 41 |
+
|
| 42 |
+
# Scale features
|
| 43 |
+
scaler = StandardScaler()
|
| 44 |
+
X_train_scaled = scaler.fit_transform(X_train)
|
| 45 |
+
X_test_scaled = scaler.transform(X_test)
|
| 46 |
+
|
| 47 |
+
# Train model
|
| 48 |
+
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
|
| 49 |
+
model.fit(X_train_scaled, y_train)
|
| 50 |
+
|
| 51 |
+
# Evaluate model
|
| 52 |
+
y_pred = model.predict(X_test_scaled)
|
| 53 |
+
y_proba = model.predict_proba(X_test_scaled)
|
| 54 |
+
|
| 55 |
+
accuracy = accuracy_score(y_test, y_pred)
|
| 56 |
+
f1 = f1_score(y_test, y_pred, average='macro')
|
| 57 |
+
|
| 58 |
+
# Handle binary or multiclass AUC
|
| 59 |
+
if y_proba.shape[1] == 2:
|
| 60 |
+
roc = roc_auc_score(y_test, y_proba[:, 1])
|
| 61 |
+
else:
|
| 62 |
+
roc = roc_auc_score(y_test, y_proba, multi_class='ovo', average='macro')
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
# --- Predict User Input ---
|
| 68 |
+
st.subheader("🔮 Predict Sleep Event")
|
| 69 |
+
anglez = st.number_input("Enter anglez:", value=27.88, format="%.4f")
|
| 70 |
+
enmo = st.number_input("Enter enmo:", value=0.00, format="%.4f")
|
| 71 |
+
|
| 72 |
+
if st.button("Predict Sleep Event"):
|
| 73 |
+
input_data = np.array([[anglez, enmo]])
|
| 74 |
+
input_scaled = scaler.transform(input_data)
|
| 75 |
+
prediction = model.predict(input_scaled)[0]
|
| 76 |
+
predicted_label = le_event.inverse_transform([prediction])[0]
|
| 77 |
+
st.success(f"Predicted Sleep Event: {predicted_label}")
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
# # app.py (your Streamlit file)
|
| 81 |
+
# import streamlit as st
|
| 82 |
+
# import numpy as np
|
| 83 |
+
# # import pickle
|
| 84 |
+
# from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
|
| 85 |
+
# import pandas as pd
|
| 86 |
+
# from sklearn.preprocessing import LabelEncoder,StandardScaler
|
| 87 |
+
# from sklearn.model_selection import train_test_split
|
| 88 |
+
# from xgboost import XGBClassifier
|
| 89 |
+
|
| 90 |
+
# st.title("🧠 Sleep Event Prediction")
|
| 91 |
+
|
| 92 |
+
# # --- Load Pickles ---
|
| 93 |
+
# # @st.cache_resource
|
| 94 |
+
# # def load_all():
|
| 95 |
+
# # with open("model.pkl", "rb") as f: model = pickle.load(f)
|
| 96 |
+
# # with open("scaler.pkl", "rb") as f: scaler = pickle.load(f)
|
| 97 |
+
# # with open("label_encoder.pkl", "rb") as f: le = pickle.load(f)
|
| 98 |
+
# # with open("X_test.pkl", "rb") as f: X_test = pickle.load(f)
|
| 99 |
+
# # with open("y_test.pkl", "rb") as f: y_test = pickle.load(f)
|
| 100 |
+
# # return model, scaler, le, X_test, y_test
|
| 101 |
+
|
| 102 |
+
# merged_df=pd.read_csv("merged_df.csv")
|
| 103 |
+
# st.dataframe(merged_df.head())
|
| 104 |
+
# # Step 1: Drop rows with nulls in key columns
|
| 105 |
+
# merged_df = merged_df.dropna(subset=['night', 'event', 'event_timestamp'])
|
| 106 |
+
|
| 107 |
+
# # Step 2: Reset index (also avoid inplace)
|
| 108 |
+
# merged_df = merged_df.reset_index(drop=True)
|
| 109 |
+
# merged_df['event_timestamp'] = pd.to_datetime(merged_df['event_timestamp'], format='%Y-%m-%dT%H:%M:%S%z',utc=True)
|
| 110 |
+
# merged_df['sensor_timestamp'] = pd.to_datetime(merged_df['sensor_timestamp'], format='%Y-%m-%dT%H:%M:%S%z',utc=True)
|
| 111 |
+
# merged_df['sleep_duration_hrs'] = (merged_df['sensor_timestamp'] - merged_df['event_timestamp']).dt.total_seconds() / 3600
|
| 112 |
+
|
| 113 |
+
# le = LabelEncoder()
|
| 114 |
+
# merged_df['series_id'] = le.fit_transform(merged_df['series_id'])
|
| 115 |
+
# merged_df['event'] = le.fit_transform(merged_df['event']) # Target label
|
| 116 |
+
|
| 117 |
+
# # columns_to_drop = ['sensor_timestamp', 'series_id', 'event_timestamp','night','sleep_duration_hrs','step']
|
| 118 |
+
|
| 119 |
+
# # Drop specified columns and define features (X) and target (y)
|
| 120 |
+
# # df_cleaned = merged_df.drop([col for col in columns_to_drop if col in merged_df.columns], axis=1)
|
| 121 |
+
|
| 122 |
+
# # X = df_cleaned.drop('event', axis=1)
|
| 123 |
+
# # y = df_cleaned['event']
|
| 124 |
+
|
| 125 |
+
# X = merged_df[['anglez', 'enmo']]
|
| 126 |
+
# y = merged_df['event']
|
| 127 |
+
|
| 128 |
+
# # Train-test split
|
| 129 |
+
# X_train, X_test, y_train, y_test = train_test_split(
|
| 130 |
+
# X, y, test_size=0.2
|
| 131 |
+
# )
|
| 132 |
+
|
| 133 |
+
# # 6. Scale features (optional for XGBoost but good practice)
|
| 134 |
+
# scaler = StandardScaler()
|
| 135 |
+
# X_train_scaled = scaler.fit_transform(X_train)
|
| 136 |
+
# X_test_scaled = scaler.transform(X_test)
|
| 137 |
+
|
| 138 |
+
# # 7. Train XGBoost model
|
| 139 |
+
# # model = XGBClassifier(n_estimators=100, max_depth=3, learning_rate=0.1, reg_alpha=1, reg_lambda=1, eval_metric='logloss')
|
| 140 |
+
# model = XGBClassifier()
|
| 141 |
+
# model.fit(X_train_scaled, y_train)
|
| 142 |
+
|
| 143 |
+
# # 8. Predict and Evaluate
|
| 144 |
+
# y_pred = model.predict(X_test_scaled)
|
| 145 |
+
# y_proba = model.predict_proba(X_test_scaled)
|
| 146 |
+
|
| 147 |
+
# accuracy = accuracy_score(y_test, y_pred)
|
| 148 |
+
# f1 = f1_score(y_test, y_pred, average='macro')
|
| 149 |
+
|
| 150 |
+
# if y_proba.shape[1] == 2:
|
| 151 |
+
# roc = roc_auc_score(y_test, y_proba[:, 1])
|
| 152 |
+
# else:
|
| 153 |
+
# roc = roc_auc_score(y_test, y_proba, multi_class='ovo', average='macro')
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
# # --- Display Metrics ---
|
| 157 |
+
# # st.subheader("Model Performance")
|
| 158 |
+
# # st.metric("Accuracy", f"{accuracy:.4f}")
|
| 159 |
+
# # st.metric("F1 Score", f"{f1:.4f}")
|
| 160 |
+
# # st.metric("ROC AUC Score", f"{roc:.4f}")
|
| 161 |
+
|
| 162 |
+
# # Create a DataFrame for metrics
|
| 163 |
+
# # import pandas as pd
|
| 164 |
+
|
| 165 |
+
# st.subheader("Model Performance")
|
| 166 |
+
|
| 167 |
+
# # Create a DataFrame for metrics
|
| 168 |
+
# metrics_df = pd.DataFrame({
|
| 169 |
+
# "Metric": ["Accuracy", "F1 Score", "ROC AUC Score"],
|
| 170 |
+
# "Value": [f"{accuracy:.4f}", f"{f1:.4f}", f"{roc:.4f}"]
|
| 171 |
+
# })
|
| 172 |
+
|
| 173 |
+
# # Display as table
|
| 174 |
+
# st.table(metrics_df)
|
| 175 |
+
|
| 176 |
+
# counts = merged_df["event"].value_counts()
|
| 177 |
+
# st.markdown("**Event Value Counts:**")
|
| 178 |
+
# st.markdown(counts.to_string())
|
| 179 |
+
|
| 180 |
+
# # --- Predict User Input ---
|
| 181 |
+
# st.subheader("Predict Sleep Event")
|
| 182 |
+
# anglez = st.number_input("Enter anglez:", value=27.8800,format="%.4f")
|
| 183 |
+
# enmo = st.number_input("Enter enmo:", value=0.0000,format="%.4f")
|
| 184 |
+
|
| 185 |
+
# if st.button("Predict Sleep Event"):
|
| 186 |
+
# input_data = np.array([[anglez, enmo]])
|
| 187 |
+
# input_scaled = scaler.transform(input_data)
|
| 188 |
+
# prediction = model.predict(input_scaled)[0]
|
| 189 |
+
# label = le.inverse_transform([prediction])[0]
|
| 190 |
+
# st.success(f"Predicted Event: {label}")
|
| 191 |
+
# Display class balance
|
| 192 |
+
|
| 193 |
+
# Display metrics
|
| 194 |
+
st.subheader("📊 Model Performance")
|
| 195 |
+
metrics_df = pd.DataFrame({
|
| 196 |
+
"Metric": ["Accuracy", "F1 Score", "ROC AUC Score"],
|
| 197 |
+
"Value": [f"{accuracy:.4f}", f"{f1:.4f}", f"{roc:.4f}"]
|
| 198 |
+
})
|
| 199 |
+
st.table(metrics_df)
|
| 200 |
+
|
| 201 |
+
st.subheader("📈 Event Value Counts")
|
| 202 |
+
value_counts_df = merged_df["event"].value_counts().reset_index()
|
| 203 |
+
value_counts_df.columns = ["Event", "Count"]
|
| 204 |
+
st.dataframe(value_counts_df)
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
numpy
|
| 2 |
+
pandas
|
| 3 |
+
joblib
|
| 4 |
+
streamlit
|
| 5 |
+
scikit-learn
|
| 6 |
+
xgboost
|
| 7 |
+
statsmodels
|
| 8 |
+
pyarrow
|
| 9 |
+
io
|
| 10 |
+
seaborn
|
| 11 |
+
matplotlib
|
sample_submission.csv
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
row_id,series_id,step,event,score
|
| 2 |
+
0,038441c925bb,100,onset,0.0
|
| 3 |
+
1,038441c925bb,105,wakeup,0.0
|
| 4 |
+
2,03d92c9f6f8a,80,onset,0.5
|
| 5 |
+
3,03d92c9f6f8a,110,wakeup,0.5
|
| 6 |
+
4,0402a003dae9,90,onset,1.0
|
| 7 |
+
5,0402a003dae9,120,wakeup,1.0
|
test_series.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2871bd99347c2eb2bae104e81bd04421108277e3f095f51caabb5e4e1f77ecf9
|
| 3 |
+
size 4585
|
train_events.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
train_series.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd11f8c3af0507757cade38419f5ec34af28dd6e3057ab17ef05b0af717106af
|
| 3 |
+
size 985815313
|