mlpredictor / app.py
omarmalik347's picture
Create app.py
65a3718 verified
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.svm import SVC, SVR
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.metrics import (
accuracy_score,
mean_squared_error,
mean_absolute_error,
r2_score,
classification_report
)
class MachineLearningApp:
def __init__(self):
st.set_page_config(
page_title="ML Model Selection App",
page_icon=":robot_face:",
layout="wide"
)
self.initialize_session_state()
def initialize_session_state(self):
"""Initialize all session state variables"""
initial_states = {
'data': None,
'X': None,
'y': None,
'model': None,
'scaler': None,
'label_encoder': None,
'problem_type': None,
'test_size': 0.2,
'selected_features': [],
'target_column': None,
'selected_model': None,
'model_results': None
}
for key, value in initial_states.items():
if key not in st.session_state:
st.session_state[key] = value
def sidebar_data_upload(self):
"""Sidebar for data upload"""
with st.sidebar:
st.header("๐Ÿ“Š Data Upload")
uploaded_file = st.file_uploader(
"Choose a CSV or Excel file",
type=['csv', 'xlsx', 'xls']
)
return uploaded_file
def sidebar_feature_selection(self, df):
"""Sidebar for feature and target selection"""
with st.sidebar:
st.header("๐Ÿ” Feature Selection")
if df is None:
st.warning("Please upload a dataset first.")
return None, None, None
numeric_cols = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
selected_features = st.multiselect(
"Select Features",
options=list(df.columns),
default=numeric_cols
)
target_column = st.selectbox(
"Select Target Column",
options=list(df.columns)
)
test_size = st.slider(
"Test Set Percentage",
min_value=0.1,
max_value=0.5,
value=0.2,
step=0.05,
help="Percentage of data to use for testing"
)
return selected_features, target_column, test_size
def sidebar_model_selection(self, problem_type):
"""Sidebar for model selection"""
with st.sidebar:
st.header("๐Ÿค– Model Selection")
if problem_type == 'classification':
models = {
'Logistic Regression': LogisticRegression(),
'Decision Tree': DecisionTreeClassifier(),
'Random Forest': RandomForestClassifier(),
'SVM': SVC(),
'Naive Bayes (Gaussian)': GaussianNB(),
'Naive Bayes (Multinomial)': MultinomialNB(),
'K-Nearest Neighbors': KNeighborsClassifier(),
'Neural Network': MLPClassifier(max_iter=1000)
}
else:
models = {
'Linear Regression': LinearRegression(),
'Decision Tree': DecisionTreeRegressor(),
'Random Forest': RandomForestRegressor(),
'SVR': SVR(),
'K-Nearest Neighbors': KNeighborsRegressor(),
'Neural Network': MLPRegressor(max_iter=1000)
}
selected_model = st.selectbox(
"Choose a Model",
options=list(models.keys())
)
return models, selected_model
def sidebar_prediction_input(self, selected_features):
"""Sidebar for prediction input"""
with st.sidebar:
st.header("๐Ÿ”ฎ Prediction Input")
if st.session_state.model is None:
st.warning("Please train a model first.")
return None
prediction_inputs = {}
for feature in selected_features:
prediction_inputs[feature] = st.number_input(
f"Enter {feature}",
value=0.0,
step=0.1
)
if st.button("Predict"):
return prediction_inputs
return None
def load_and_display_data(self, uploaded_file):
"""Load data and display dataset information"""
if uploaded_file is not None:
try:
if uploaded_file.name.endswith('.csv'):
df = pd.read_csv(uploaded_file)
else:
df = pd.read_excel(uploaded_file)
st.session_state.data = df
col1, col2 = st.columns(2)
with col1:
st.subheader("๐Ÿ“‹ Dataset Preview")
st.dataframe(df.head())
with col2:
st.subheader("๐Ÿ“Š Dataset Information")
st.write(f"Total Rows: {df.shape[0]}")
st.write(f"Total Columns: {df.shape[1]}")
col_types = df.dtypes.value_counts()
st.write("Column Types:")
for dtype, count in col_types.items():
st.text(f"{dtype}: {count} columns")
return df
except Exception as e:
st.error(f"Error loading file: {e}")
return None
def train_and_evaluate_model(self, X, y, test_size, models, selected_model_name):
"""Train and evaluate the selected model"""
results_container = st.container()
with results_container:
X_scaled = StandardScaler().fit_transform(X)
problem_type = 'classification' if y.dtype == 'object' else 'regression'
label_encoder = None
if problem_type == 'classification':
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
else:
y_encoded = y
X_train, X_test, y_train, y_test = train_test_split(
X_scaled, y_encoded, test_size=test_size, random_state=42
)
model = models[selected_model_name]
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
st.header("๐Ÿ”ฌ Model Training Results")
col1, col2 = st.columns(2)
with col1:
st.subheader("๐Ÿ“Š Model Performance")
if problem_type == 'classification':
accuracy = accuracy_score(y_test, y_pred)
st.metric("Accuracy", f"{accuracy:.2%}")
st.subheader("Classification Report")
report = classification_report(
y_test, y_pred,
target_names=label_encoder.classes_ if label_encoder else None,
output_dict=True
)
for key, value in report.items():
if isinstance(value, dict):
st.text(f"{key}:")
for metric, score in value.items():
st.text(f" {metric}: {score:.2f}")
else:
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
st.metric("Mean Squared Error", f"{mse:.4f}")
st.metric("Mean Absolute Error", f"{mae:.4f}")
st.metric("Rยฒ Score", f"{r2:.4f}")
with col2:
st.subheader("๐Ÿ“ˆ Model Details")
st.write(f"Selected Model: {selected_model_name}")
st.write(f"Problem Type: {problem_type}")
st.write(f"Test Set Size: {test_size:.0%}")
st.write(f"Features Used: {', '.join(X.columns)}")
st.write(f"Target Column: {y.name}")
st.session_state.model = model
st.session_state.scaler = StandardScaler().fit(X)
st.session_state.label_encoder = label_encoder
st.session_state.problem_type = problem_type
st.session_state.X = X
def make_prediction(self, prediction_inputs):
"""Make prediction on unseen data"""
if st.session_state.model is None:
st.error("Please train a model first.")
return
input_df = pd.DataFrame([prediction_inputs])
input_scaled = st.session_state.scaler.transform(input_df)
prediction = st.session_state.model.predict(input_scaled)
if st.session_state.label_encoder:
prediction = st.session_state.label_encoder.inverse_transform(prediction)
st.header("๐ŸŽฏ Prediction Result")
st.subheader("Input Data")
st.dataframe(input_df)
st.subheader("Predicted Value")
st.write(prediction[0])
def run(self):
"""Main application flow"""
uploaded_file = self.sidebar_data_upload()
st.title("๐Ÿš€ Predict on Custom Data using any ML Model")
df = self.load_and_display_data(uploaded_file)
if df is not None:
selected_features, target_column, test_size = self.sidebar_feature_selection(df)
if selected_features and target_column:
X = df[selected_features]
y = df[target_column]
problem_type = 'classification' if y.dtype == 'object' else 'regression'
models, selected_model = self.sidebar_model_selection(problem_type)
with st.sidebar:
if st.button("Train Model", type="primary"):
for key in ['model', 'scaler', 'label_encoder', 'problem_type']:
st.session_state[key] = None
self.train_and_evaluate_model(
X, y, test_size, models, selected_model
)
prediction_inputs = self.sidebar_prediction_input(selected_features)
if prediction_inputs:
self.make_prediction(prediction_inputs)
if __name__ == "__main__":
app = MachineLearningApp()
app.run()