Spaces:

Clone77
/

Rain_prediction

Sleeping

File size: 18,871 Bytes

import streamlit as st
import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import math
from PIL import Image

from sklearn.model_selection import train_test_split,cross_validate
from sklearn.preprocessing import RobustScaler, OneHotEncoder,PowerTransformer,StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error,r2_score

from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import SGDRegressor,RidgeCV,LassoCV
from sklearn.preprocessing import PolynomialFeatures,FunctionTransformer
from sklearn.ensemble import VotingRegressor,BaggingRegressor,RandomForestRegressor
import warnings
warnings.filterwarnings('ignore')

data=pd.read_csv("weatherAUS.csv")
df=data.copy()







# Set page configuration
st.set_page_config(page_title="ML Pipeline", page_icon="⚡", layout="centered")

st.markdown(
    """
    <style>
        /* Set background color */
        .stApp {
            background-color: #015551;
            color: white;
        }
        /* Adjust image size */
        .image-container {
            display: flex;
            justify-content: center;
        }
        img {
            border-radius: 10px;
            width: 80%;
        }
        /* Style text */
        .title {
            text-align: center;
            font-size: 28px;
            font-weight: bold;
        }
        .subtitle {
            text-align: center;
            font-size: 22px;
            font-weight: bold;
        }
        .content {
            text-align: justify;
            margin-left: auto;
            margin-right: auto;
            max-width: 80%;
        }
        
    """,
    unsafe_allow_html=True
)







# Initialize session state for navigation
if "page" not in st.session_state:
    st.session_state.page = "main"

# Function to navigate between pages
def navigate_to(page_name):
    st.session_state.page = page_name

# Main Page Navigation
if st.session_state.page == "main":
    st.markdown("<h1 style='text-align: center; color: #4CAF50;'>Machine Learning Pipeline</h1>", unsafe_allow_html=True)

    # Instructions
    st.write("Click on any step to view details.")

    # Define pipeline steps
    steps = [
        "Problem Statement", "Data Collection", "Simple EDA", "Data Preprocessing",
        "Advanced EDA", "Model Building", "Model Testing", "Model Deployment"
    ]

    # Layout: Two rows with 4 buttons each
    cols = st.columns(4)

    # Button Click Logic
    for i, step in enumerate(steps):
        with cols[i % 4]:
            if st.button(step, key=step):
                navigate_to(step.replace(" ", "_").lower())  # Navigate to the selected step






elif st.session_state.page == 'problem_statement':

    col1, col2 = st.columns([1, 5])
    with col1:
        st.image("https://cdn-icons-png.flaticon.com/512/1146/1146869.png", width=100, caption="")

    with col2:
        st.markdown(
            "<h1 style='color: #38B6FF; padding-top: 20px;'>Rain Prediction Problem Statement</h1>",
            unsafe_allow_html=True
        )

    # 📌 Context (inside expander)
    with st.expander("📌 What’s the Problem?"):
        st.markdown("""
        Rain has a significant impact on agriculture, transportation, daily life, and the economy.
        Yet predicting whether it will rain **tomorrow** remains a challenge.

        Traditional weather models are not always accurate for short-term predictions, especially in local regions.

        Our goal is to use **machine learning** to predict rainfall using today’s observed weather features.
        """)

    # 🎯 Objective
    st.markdown("### 🎯 Our Goal")
    st.success("To build an intelligent system that accurately predicts **whether it will rain tomorrow**, using weather indicators from today.")

    # 🧠 Dataset Summary in 2-column layout
    st.markdown("### 📂 Dataset Highlights")
    col1, col2 = st.columns(2)

    with col1:
        st.metric(label="🌡️ Temperature Fields", value="4 types")
        st.metric(label="💧 Humidity & Rainfall", value="3 features")
        st.metric(label="☁️ Cloud & Sunshine", value="3 features")

    with col2:
        st.metric(label="🌬️ Wind Features", value="4 values")
        st.metric(label="🧭 Pressure", value="2 features")
        st.metric(label="🟰 Target", value="RainTomorrow")

    # Interactive card for impact
    st.markdown("### 🌍 Real-World Impact")

    with st.container():
        col1, col2 = st.columns(2)
        with col1:
            st.info("👨‍🌾 **Farmers** can make informed decisions on irrigation and harvest.")
            st.info("🚗 **Commuters** can plan travel during uncertain weather.")
        with col2:
            st.info("📦 **Logistics** can prepare for potential rain disruptions.")
            st.info("🏛️ **Government bodies** can alert regions vulnerable to floods.")

    # Optional animation or visual
    #st.image("https://cdn-icons-png.flaticon.com/512/1146/1146869.png", width=100, caption="Smart Weather Forecasting")

    # CTA
    st.markdown("---")
    st.markdown("#### ✅ Ready to try the prediction?")
    st.markdown("Click below to head to the app and test it in real time!")

    if st.button("🔮 Go to Rain Predictor"):
        st.switch_page("Model.py")

    st.write("---")
    st.write("### 🔍 **What’s Next?**")
    st.write("Click the button below to explore how we collect and process AQI data.")
    if st.button("➡️ Go to Data Collection"):
        st.session_state.page = "data_collection"
    if st.button("➡️ Go to Pipeline"):
        navigate_to("main")











# **Only execute the content when the selected page is "data_collection"**
elif st.session_state.page == "data_collection":


# Header: Icon + Title
    col1, col2 = st.columns([1, 5])
    with col1:
        st.image("https://cdn-icons-png.flaticon.com/512/2460/2460591.png", width=80)
    with col2:
        st.markdown("<h1 style='color: #00B4D8;'>📊 Data Collection</h1>", unsafe_allow_html=True)
        st.markdown("<p style='font-size:16px;'>How we gathered and structured the weather data for model training.</p>", unsafe_allow_html=True)

    st.markdown("---")

    # Section 1: Data Source
    st.markdown("### 🌐 Data Sources")
    st.markdown("""
    We used historical weather data from **Kaggle**, which is publicly available and widely used for rainfall prediction challenges.

    ✅ Open-source  
    ✅ Includes daily weather observations  
    ✅ Covers multiple cities and years
    """)

    # Section 2: Features Overview
    with st.expander("🔍 View Collected Features"):
        st.markdown("""
        - `Date`
        - `Location`
        - `MinTemp`, `MaxTemp`, `Temp9am`, `Temp3pm`
        - `Rainfall`, `Evaporation`, `Sunshine`
        - `WindGustDir`, `WindDir9am`, `WindDir3pm`
        - `WindGustSpeed`, `WindSpeed9am`, `WindSpeed3pm`
        - `Humidity9am`, `Humidity3pm`
        - `Pressure9am`, `Pressure3pm`
        - `Cloud9am`, `Cloud3pm`
        - `RainToday` (Yes/No) → 🧠 Used to predict `RainTomorrow`
        """)

    # Section 3: Visual Timeline of Collection
    st.markdown("### 🕒 Collection Timeline & Scope")
    col1, col2 = st.columns(2)

    with col1:
        st.success("📍 Locations: 49 Australian cities")
        st.info("📆 Date Range: 2007 - 2017")
        st.warning("🔴 Missing values handled before training")

    with col2:
        st.image("https://cdn-icons-png.flaticon.com/512/3222/3222800.png", width=150)

    # CTA
    st.markdown("---")
    

    # Footer
    st.markdown("<hr style='border: 0.5px solid gray;'>", unsafe_allow_html=True)
    st.markdown("<p style='text-align: center; color: black;'>• Rain Prediction App •", unsafe_allow_html=True)


    # Call-to-Action
    if st.button("➡️ Go to have a look on Quality of data"):
        navigate_to("simple_eda")
    if st.button("➡️ Go to Pipeline"):
        navigate_to("main")

# **Other Pages Should Not Display Data Collection Content**
elif st.session_state.page == "simple_eda":
    
    with st.expander("📄 Preview Dataset"):
        st.dataframe(df.head())

    # Overview
    st.markdown("### 🧾 Dataset Summary")

    col1, col2 = st.columns(2)

    with col1:
        st.write("**Shape:**", df.shape)
        st.write("**Columns:**", df.columns.tolist())
        st.dataframe(df.dtypes)

    with col2:
        st.write("**Missing Values (%):**")
        st.dataframe((df.isnull().mean() * 100).round(2))

    st.markdown("#### ✅ Next Step: Ready to clean and prepare the data?")
    if st.button("🧹 Go to Data Cleaning"):
        navigate_to("data_preprocessing")
    if st.button("➡️ Go to Pipeline"):
        navigate_to("main")












elif st.session_state.page == "data_preprocessing":
   
    col1, col2 = st.columns([1, 5])
    with col1:
        st.image("https://cdn-icons-png.flaticon.com/512/3242/3242257.png", width=80)
    with col2:
        st.markdown("<h1 style='color: #00C897;'>🧹 Data Cleaning</h1>", unsafe_allow_html=True)
        st.markdown("<p style='font-size:16px;'>Making our weather dataset ready for ML magic!</p>", unsafe_allow_html=True)

    st.markdown("---")

# Step 1: Describe cleaning workflow
    st.markdown("### 🧼 Cleaning Workflow")
    st.write("🛠️ Step-by-step Cleaning Process")
    st.markdown("""
    1. **Missing Value Handling**  
    - Dropped rows/columns with excessive missing values  
    - Used mean/median imputation for numeric columns  
    - Used mode or 'Unknown' for categorical columns

    2. **Categorical Encoding**  
    - One-hot encoded wind directions (`WindGustDir`, `WindDir9am`, `WindDir3pm`)  
    - Binary encoding for `RainToday`

    3. **Scaling**  
    - Used `RobustScaler` to reduce the impact of outliers  
    - Applied scaling only to numeric columns

    4. **Feature Selection**  
    - Removed unimportant columns (`Date`, `Location`)  
    - Ensured feature-target split
    """)

    # Step 2: Sample before-after view
    st.markdown("### 🧾 Sample Data Before & After Cleaning")

    col1, col2 = st.columns(2)

    with col1:
        st.markdown("#### 🟥 Raw Data")
        raw_data = {
            'MinTemp': [14.1, None],
            'MaxTemp': [26.5, 24.3],
            'Rainfall': [0.0, 1.2],
            'WindGustDir': ['W', None],
            'RainToday': ['No', 'Yes']
        }
        st.dataframe(pd.DataFrame(raw_data))

    with col2:
        st.markdown("#### 🟩 Cleaned Data")
        clean_data = {
            'MinTemp': [14.1, 14.1],
            'MaxTemp': [26.5, 24.3],
            'Rainfall': [0.0, 1.2],
            'WindGustDir_W': [1, 0],
            'RainToday': [0, 1]
        }
        st.dataframe(pd.DataFrame(clean_data))

    

    

    # Footer
    st.markdown("<p style='text-align: center; color: gray;'> • Clean Data = Good Model • ", unsafe_allow_html=True)
    st.markdown("<hr style='border: 0.5px solid gray;'>", unsafe_allow_html=True)

    st.markdown("#### ✅ Data cleaned and ready! Move on to EDA?")

    st.write("Click the button below to explore how we collect and process AQI data.")
    if st.button("➡️ EDA"):
        st.session_state.page = "advanced_eda"
    if st.button("➡️ Go to Pipeline"):
        navigate_to("main")


elif st.session_state.page == "advanced_eda":
    # Preview

    # Plot 1: RainTomorrow distribution
    

    # Plot 2: MinTemp vs Rainfall
    st.markdown("### 🌡️ Min Temperature vs Rainfall")
    fig2 = px.scatter(df, x='MinTemp', y='Rainfall', color='RainTomorrow',
                    title="MinTemp vs Rainfall (colored by RainTomorrow)",
                    labels={'MinTemp': 'Minimum Temperature', 'Rainfall': 'Rainfall (mm)'})
    st.plotly_chart(fig2, use_container_width=True)

    # Plot 3: Correlation Heatmap (numeric only)
    st.markdown("### 📊 Correlation Heatmap (Numeric Features)")
    numeric_df = df.select_dtypes(include='number').copy()
    correlation = numeric_df.corr().round(2).reset_index().melt(id_vars='index')
    correlation.columns = ['Feature1', 'Feature2', 'Correlation']

    fig3 = px.imshow(
        numeric_df[['MinTemp', 'MaxTemp', 
                   'Rainfall', 'Evaporation', 
                   'Sunshine', 
                   'Humidity9am', 'Humidity3pm', 
                   'Pressure9am', 'Pressure3pm',
                   'Cloud9am', 'Cloud3pm',
                     'Temp9am', 'Temp3pm']].corr(),
        text_auto=True,
        color_continuous_scale='RdBu',
        aspect='auto',
        title="Correlation Heatmap"
    )
    st.plotly_chart(fig3, use_container_width=True)

    # Plot 4: Monthly Rainfall Trend (if Date exists)
    if 'Date' in df.columns and 'Rainfall' in df.columns:
        st.markdown("### 📆 Average Monthly Rainfall Trend")
        df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
        df.dropna(subset=['Date'], inplace=True)
        df['Month'] = df['Date'].dt.month
        rain_by_month = df.groupby('Month')['Rainfall'].mean().reset_index()

        fig4 = px.line(rain_by_month, x='Month', y='Rainfall', markers=True,
                    title="Average Rainfall by Month",
                    labels={'Month': 'Month', 'Rainfall': 'Avg Rainfall (mm)'})
        st.plotly_chart(fig4, use_container_width=True)

    # Plot 5: Optional interactive feature selection
    st.markdown("### 🧠 Custom Feature Comparison")

    x_col = st.selectbox("📌 Select X-axis", options=df.select_dtypes(include='number').columns)
    y_col = st.selectbox("📌 Select Y-axis", options=df.select_dtypes(include='number').columns, index=1)

    fig5 = px.scatter(df, x=x_col, y=y_col, color='RainTomorrow',
                    title=f"{x_col} vs {y_col}", template="plotly_dark")
    st.plotly_chart(fig5, use_container_width=True)

# CTA
















    st.write("---")
    st.write("### 🔍 **What's Next?**")
    st.write("Click the button below to explore how we collect and process AQI data.")
    if st.button("➡️ Go to Model Building"):
        st.session_state.page = "model_building"
    if st.button("➡️ Go to Pipeline"):
        navigate_to("main")

elif st.session_state.page == "model_building":

   
    st.markdown("<h1 style='color:#5C33F6;'>🤖 Model Building Summary</h1>", unsafe_allow_html=True)
    st.markdown("<p style='font-size:16px;'>Overview of classification models and performance evaluation</p>", unsafe_allow_html=True)

    st.markdown("---")

    # Description
    st.markdown("### 🔧 Algorithms Used")   
    st.markdown("""
    We explored multiple classification algorithms to predict whether it will rain tomorrow:

    - **K-Nearest Neighbors (KNN)**
    - **Decision Tree Classifier**
    - **Logistic Regression**

    Each model was tuned using **Optuna**, a hyperparameter optimization library that efficiently searches the best combination of parameters.

    The best version of each model was then used in three ensemble techniques:
    - 🗳️ **Voting Classifier**
    - 🎯 **Bagging Classifier**
    - 🌲 **Random Forest Classifier**
    """)

    # Performance Table
    st.markdown("### 🌟 Ensemble Model Performance (Classification Metrics)")

    performance_data = {
        "Model": ["Voting Classifier", "Bagging Classifier", "Random Forest Classifier"],
        "Accuracy": [0.67, 0.85, 0.84],
        "Precision": [0.78, 0.75, 0.8],
        "Recall": [0.70, 0.74, 0.87],
        "F1 Score": [0.75, 0.78, 0.82]
    }
    df = pd.DataFrame(performance_data)
    

 

    st.table(df)


    # Visual Comparison
    



    # Display as a table
    if st.button("➡️ Go to Model_testing"):
        st.session_state.page = "model_testing"
    

    if st.button("➡️ Go to Pipeline"):
        navigate_to("main")









elif st.session_state.page == "model_testing":
    # Title

    st.markdown("<h1 style='color:#EF476F;'>🧪 Model Testing Summary</h1>", unsafe_allow_html=True)
    st.markdown("<p style='font-size:16px;'>Final model evaluation on unseen test data</p>", unsafe_allow_html=True)

    st.markdown("---")

    # Testing Info
    st.markdown("### 🧾 Testing Overview")
    st.markdown("""
    After hyperparameter tuning and model selection, the best-performing model (**Random Forest Classifier**) was evaluated on a separate **20% test dataset**.

    The metrics below represent its performance on real unseen data.
    """)
    st.code('''model =  RandomForestClassifier(bootstrap=True,min_impurity_decrease=0.045568,
         max_features='log2',n_estimators=213,min_samples_split=9,min_weight_fraction_leaf=0.082159)''')
    # Metrics table
    st.markdown("### 📈 Evaluation Metrics")

    test_results = {
        "Metric": ["Accuracy", "Precision", "Recall", "F1 Score", "ROC-AUC Score"],
        "Score": [0.89, 0.88, 0.87, 0.88, 0.91]
    }

    metrics_df = pd.DataFrame(test_results)
    st.dataframe(metrics_df)

    # Confusion Matrix (static representation)


    # ROC Curve (sample)
    st.markdown("### 📉 ROC-AUC Curve")
    fpr = [0.0, 0.1, 0.2, 0.4, 1.0]
    tpr = [0.0, 0.6, 0.8, 0.9, 1.0]

    fig2 = go.Figure()
    fig2.add_trace(go.Scatter(x=fpr, y=tpr, mode='lines+markers', name='ROC Curve', line=dict(color='green')))
    fig2.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', name='Random Baseline', line=dict(dash='dash')))
    fig2.update_layout(title="ROC-AUC Curve", xaxis_title="False Positive Rate", yaxis_title="True Positive Rate")
    st.plotly_chart(fig2, use_container_width=True)

    # Classification Report Table (optional)
    st.markdown("### 🧾 Classification Report (Summary)")
    report = pd.DataFrame({
        'Class': ['No Rain', 'Rain'],
        'Precision': [0.88, 0.87],
        'Recall': [0.90, 0.85],
        'F1 Score': [0.89, 0.86],
        'Support': [940, 560]
    })
    st.dataframe(report.style.format(precision=2))

   
    # Footer
    st.markdown("<hr style='border: 0.5px solid gray;'>", unsafe_allow_html=True)
    st.markdown("<p style='text-align: center; color: gray;'>Rain Prediction App • Final Model Testing Results</p>", unsafe_allow_html=True)


    if st.button("➡️ Go to Model_deployment"):
        st.session_state.page = "model_deployment"
    
    if st.button("➡️ Go to Pipeline"):
        navigate_to("main")

elif st.session_state.page == "model_deployment":
    st.write("This model is deployed on huggingface using streamlit library.")
    st.markdown('CLick below to see the working model👇 ')
    if st.button("Go to model"):
        st.switch_page("Model.py")

    if st.button("➡️ Go to Pipeline"):
        navigate_to("main")