Spaces:
Build error
Build error
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
from sklearn.model_selection import train_test_split | |
from sklearn.ensemble import RandomForestClassifier | |
from xgboost import XGBClassifier | |
from imblearn.over_sampling import SMOTENC | |
from sklearn.metrics import classification_report, roc_auc_score | |
from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
from sklearn.compose import ColumnTransformer | |
from sklearn.pipeline import Pipeline | |
# Function to generate synthetic dataset | |
def generate_synthetic_data(): | |
np.random.seed(42) | |
n_samples = 1000 | |
data = pd.DataFrame({ | |
"age": np.random.randint(18, 90, n_samples), | |
"gender": np.random.choice(["Male", "Female"], n_samples), | |
"mechanical_ventilation": np.random.choice([0, 1], n_samples), | |
"dialysis": np.random.choice([0, 1], n_samples), | |
"gcs": np.random.randint(3, 15, n_samples), | |
"sodium": np.random.uniform(135, 145, n_samples), | |
"heart_rate": np.random.randint(60, 120, n_samples), | |
"creatinine": np.random.uniform(0.5, 3.5, n_samples), | |
"discharge_destination": np.random.choice(["Home", "Nursing Facility", "Rehabilitation", "Death"], n_samples, p=[0.6, 0.2, 0.1, 0.1]) | |
}) | |
return data | |
# Title | |
st.title("Discharge Destination Prediction") | |
# Generate and display synthetic dataset | |
data = generate_synthetic_data() | |
st.write("Synthetic Dataset Preview:") | |
st.dataframe(data.head()) | |
# Select predictors and target | |
predictors = st.multiselect("Select predictor features", data.columns.tolist(), default=["age", "gender", "mechanical_ventilation", "dialysis", "gcs", "sodium", "heart_rate", "creatinine"]) | |
target = st.selectbox("Select target column", ["discharge_destination"]) | |
# Split data | |
X = data[predictors] | |
y = data[target] | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42) | |
# Preprocessing | |
categorical_features = X.select_dtypes(include=['object', 'category']).columns | |
numerical_features = X.select_dtypes(include=['int64', 'float64']).columns | |
preprocessor = ColumnTransformer([ | |
('num', StandardScaler(), numerical_features), | |
('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features) | |
]) | |
# SMOTENC for imbalance | |
smote_nc = SMOTENC( | |
categorical_features=[X.columns.get_loc(col) for col in categorical_features], | |
random_state=42 | |
) | |
X_train_resampled, y_train_resampled = smote_nc.fit_resample(X_train, y_train) | |
# Select model | |
model_name = st.selectbox("Select a model", ["Random Forest", "XGBoost"]) | |
if model_name == "Random Forest": | |
model = RandomForestClassifier(n_estimators=100, random_state=42) | |
else: | |
model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42) | |
# Build pipeline | |
pipeline = Pipeline([ | |
('preprocessor', preprocessor), | |
('classifier', model) | |
]) | |
# Train and evaluate | |
if st.button("Train Model"): | |
pipeline.fit(X_train_resampled, y_train_resampled) | |
y_pred = pipeline.predict(X_test) | |
# Display results | |
st.write("Classification Report:") | |
st.text(classification_report(y_test, y_pred)) | |
roc_auc = roc_auc_score(pd.get_dummies(y_test), pd.get_dummies(y_pred), multi_class='ovr') | |
st.write(f"ROC AUC Score: {roc_auc:.4f}") |