Spaces:

Nikhillmahesh701
/

Loan_Recovery

Running

App Files Files Community

Loan_Recovery / app.py

Nikhillmahesh701

Create app.py

a47b6e9 verified about 2 months ago

raw

history blame contribute delete

15.9 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib
	matplotlib.use('Agg') # Use non-interactive backend
	import matplotlib.pyplot as plt
	import seaborn as sns
	import os
	import joblib
	from src.models.loan_recovery_model import LoanRecoveryModel
	from src.utils.data_generator import generate_loan_data
	from src.preprocessing.data_processor import LoanDataProcessor

	# Set page configuration
	st.set_page_config(
	page_title="Smart Loan Recovery System",
	page_icon="💰",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Define functions
	@st.cache_data
	def load_sample_data():
	"""Load or generate sample data."""
	data_path = "data/loan_data.csv"
	if os.path.exists(data_path):
	return pd.read_csv(data_path)
	else:
	data = generate_loan_data(n_samples=1000)
	os.makedirs("data", exist_ok=True)
	data.to_csv(data_path, index=False)
	return data

	@st.cache_resource
	def load_model(model_type="random_forest"):
	"""Load the trained model."""
	model_path = f"models/loan_recovery_{model_type}.pkl"

	# Check if model exists, if not train it
	if not os.path.exists(model_path):
	st.info(f"Model not found. Training a new {model_type} model...")
	from src.train_model import train_and_save_model
	train_and_save_model(model_type=model_type)

	return LoanRecoveryModel.load_model(model_path)

	def predict_recovery(model, data):
	"""Make predictions using the model."""
	recovery_probs = model.predict(data)
	return recovery_probs

	def plot_recovery_distribution(data):
	"""Plot the distribution of recovery status."""
	fig, ax = plt.subplots(figsize=(10, 6))
	recovery_counts = data['recovery_status'].value_counts()
	labels = ['Not Recovered', 'Recovered']
	ax.bar(labels, recovery_counts.values)
	ax.set_ylabel('Count')
	ax.set_title('Distribution of Loan Recovery Status')
	for i, v in enumerate(recovery_counts.values):
	ax.text(i, v + 5, str(v), ha='center')

	# Add percentage labels
	total = len(data)
	for i, v in enumerate(recovery_counts.values):
	percentage = v / total * 100
	ax.text(i, v/2, f"{percentage:.1f}%", ha='center', color='white', fontweight='bold')

	return fig

	def plot_feature_importance(model):
	"""Plot feature importance."""
	return model.plot_feature_importance(top_n=10)

	def plot_recovery_by_feature(data, feature, is_categorical=False):
	"""Plot recovery rate by a specific feature."""
	fig, ax = plt.subplots(figsize=(10, 6))

	if is_categorical:
	# For categorical features
	recovery_by_feature = data.groupby(feature)['recovery_status'].mean().sort_values()
	counts = data.groupby(feature).size()

	# Create a bar plot
	bars = ax.bar(recovery_by_feature.index, recovery_by_feature.values * 100)
	ax.set_ylabel('Recovery Rate (%)')
	ax.set_title(f'Recovery Rate by {feature.replace("_", " ").title()}')
	ax.set_ylim(0, 100)

	# Add count labels
	for i, (idx, count) in enumerate(counts.items()):
	ax.text(i, 5, f"n={count}", ha='center', color='white', fontweight='bold')

	# Rotate x-axis labels if needed
	if len(recovery_by_feature) > 5:
	plt.xticks(rotation=45, ha='right')
	else:
	# For numerical features, create bins
	if feature in ['age', 'loan_term', 'previous_defaults', 'days_past_due']:
	# These features have a small range, so we can use them directly
	data['feature_bin'] = data[feature]
	else:
	# Create bins for continuous features
	data['feature_bin'] = pd.qcut(data[feature], 5, duplicates='drop')

	# Calculate recovery rate by bin
	recovery_by_bin = data.groupby('feature_bin')['recovery_status'].mean().sort_index()
	counts = data.groupby('feature_bin').size()

	# Create a bar plot
	bars = ax.bar(range(len(recovery_by_bin)), recovery_by_bin.values * 100)
	ax.set_ylabel('Recovery Rate (%)')
	ax.set_title(f'Recovery Rate by {feature.replace("_", " ").title()}')
	ax.set_ylim(0, 100)

	# Set x-axis labels
	if feature in ['age', 'loan_term', 'previous_defaults', 'days_past_due']:
	ax.set_xticks(range(len(recovery_by_bin)))
	ax.set_xticklabels(recovery_by_bin.index)
	else:
	# Format bin labels
	bin_labels = []
	for bin_range in recovery_by_bin.index:
	if hasattr(bin_range, 'left') and hasattr(bin_range, 'right'):
	bin_labels.append(f"{bin_range.left:.1f}-{bin_range.right:.1f}")
	else:
	bin_labels.append(str(bin_range))

	ax.set_xticks(range(len(recovery_by_bin)))
	ax.set_xticklabels(bin_labels)
	plt.xticks(rotation=45, ha='right')

	# Add count labels
	for i, count in enumerate(counts.values):
	ax.text(i, 5, f"n={count}", ha='center', color='white', fontweight='bold')

	# Add feature name to x-axis
	ax.set_xlabel(feature.replace("_", " ").title())

	plt.tight_layout()
	return fig

	# Main application
	def main():
	# Header
	st.title("Smart Loan Recovery System")
	st.image("https://img.icons8.com/color/96/000000/loan.png", width=100)

	# Load data and model
	data = load_sample_data()

	# Load Random Forest model only
	model = load_model("random_forest")

	# Prediction page
	st.title("Predict Loan Recovery")

	st.write("""
	Use this tool to predict the probability of recovering a loan based on customer and loan information.
	You can either:
	1. Enter information for a single loan
	2. Upload a CSV file with multiple loans
	""")

	prediction_type = st.radio("Prediction Type", ["Single Loan", "Batch Prediction"])

	if prediction_type == "Single Loan":
	st.subheader("Enter Loan Information")

	col1, col2, col3 = st.columns(3)

	with col1:
	age = st.number_input("Age", min_value=18, max_value=100, value=35)
	gender = st.selectbox("Gender", ["Male", "Female"])
	employment_status = st.selectbox(
	"Employment Status",
	["Employed", "Self-employed", "Unemployed", "Retired"]
	)
	annual_income = st.number_input("Annual Income ($)", min_value=0, value=60000)

	with col2:
	credit_score = st.slider("Credit Score", 300, 850, 650)
	loan_amount = st.number_input("Loan Amount ($)", min_value=1000, value=20000)
	interest_rate = st.slider("Interest Rate (%)", 1.0, 25.0, 8.0, 0.1)
	loan_term = st.selectbox("Loan Term (months)", [12, 24, 36, 48, 60])

	with col3:
	payment_history = st.selectbox(
	"Payment History",
	["Excellent", "Good", "Fair", "Poor", "Very Poor"]
	)
	days_past_due = st.number_input("Days Past Due", min_value=0, value=0)
	previous_defaults = st.number_input("Previous Defaults", min_value=0, max_value=10, value=0)

	# Calculate derived features
	monthly_payment = (loan_amount * (interest_rate/100/12) *
	(1 + interest_rate/100/12)**(loan_term)) / \
	((1 + interest_rate/100/12)**(loan_term) - 1)

	debt_to_income = (monthly_payment * 12) / max(1, annual_income)

	# Display calculated values
	st.subheader("Calculated Values")
	col1, col2 = st.columns(2)
	with col1:
	st.metric("Monthly Payment", f"${monthly_payment:.2f}")
	with col2:
	st.metric("Debt-to-Income Ratio", f"{debt_to_income*100:.2f}%")

	# Create input dataframe
	input_data = pd.DataFrame({
	'age': [age],
	'gender': [gender],
	'employment_status': [employment_status],
	'annual_income': [annual_income],
	'credit_score': [credit_score],
	'loan_amount': [loan_amount],
	'interest_rate': [interest_rate],
	'loan_term': [loan_term],
	'payment_history': [payment_history],
	'days_past_due': [days_past_due],
	'previous_defaults': [previous_defaults],
	'monthly_payment': [monthly_payment],
	'debt_to_income': [debt_to_income]
	})

	# Make prediction
	if st.button("Predict Recovery Probability"):
	with st.spinner("Calculating recovery probability..."):
	recovery_prob = predict_recovery(model, input_data)[0]

	# Display result
	st.subheader("Prediction Result")

	# Create gauge chart for probability
	fig, ax = plt.subplots(figsize=(10, 2))
	ax.barh([0], [100], color='lightgray', height=0.5)
	ax.barh([0], [recovery_prob * 100], color='green' if recovery_prob >= 0.5 else 'red', height=0.5)
	ax.set_xlim(0, 100)
	ax.set_yticks([])
	ax.set_xticks([0, 25, 50, 75, 100])
	ax.set_xticklabels(['0%', '25%', '50%', '75%', '100%'])
	ax.axvline(50, color='gray', linestyle='--', alpha=0.5)
	ax.text(recovery_prob * 100, 0, f"{recovery_prob*100:.1f}%",
	ha='center', va='center', fontweight='bold', color='black')

	st.pyplot(fig)

	# Recommendation
	st.subheader("Recovery Assessment")
	if recovery_prob >= 0.8:
	st.success("High probability of recovery. Standard collection procedures recommended.")
	elif recovery_prob >= 0.5:
	st.info("Moderate probability of recovery. Consider offering a payment plan.")
	elif recovery_prob >= 0.3:
	st.warning("Low probability of recovery. Consider debt restructuring or settlement offers.")
	else:
	st.error("Very low probability of recovery. Consider debt write-off or third-party collection.")

	# Risk factors
	st.subheader("Key Risk Factors")
	risk_factors = []

	if credit_score < 600:
	risk_factors.append("Low credit score")
	if days_past_due > 30:
	risk_factors.append("Significant payment delay")
	if previous_defaults > 0:
	risk_factors.append("History of defaults")
	if debt_to_income > 0.4:
	risk_factors.append("High debt-to-income ratio")
	if payment_history in ["Poor", "Very Poor"]:
	risk_factors.append("Poor payment history")

	if risk_factors:
	for factor in risk_factors:
	st.write(f"• {factor}")
	else:
	st.write("No significant risk factors identified.")

	else: # Batch prediction
	st.subheader("Upload CSV File")
	st.write("""
	Upload a CSV file with loan information. The file should contain the following columns:
	age, gender, employment_status, annual_income, credit_score, loan_amount, interest_rate,
	loan_term, payment_history, days_past_due, previous_defaults
	""")

	# Sample file download
	sample_data = data.sample(5).drop(['customer_id', 'recovery_status'], axis=1, errors='ignore')

	@st.cache_data
	def convert_df_to_csv(df):
	return df.to_csv(index=False).encode('utf-8')

	csv = convert_df_to_csv(sample_data)
	st.download_button(
	"Download Sample CSV",
	csv,
	"sample_loans.csv",
	"text/csv",
	key='download-csv'
	)

	# File upload
	uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

	if uploaded_file is not None:
	# Load and display the data
	batch_data = pd.read_csv(uploaded_file)
	st.write("Preview of uploaded data:")
	st.dataframe(batch_data.head())

	# Check for required columns
	required_cols = ['age', 'gender', 'employment_status', 'annual_income',
	'credit_score', 'loan_amount', 'interest_rate',
	'loan_term', 'payment_history', 'days_past_due',
	'previous_defaults']

	missing_cols = [col for col in required_cols if col not in batch_data.columns]

	if missing_cols:
	st.error(f"Missing required columns: {', '.join(missing_cols)}")
	else:
	# Calculate derived features if not present
	if 'monthly_payment' not in batch_data.columns:
	batch_data['monthly_payment'] = (
	batch_data['loan_amount'] * (batch_data['interest_rate']/100/12) *
	(1 + batch_data['interest_rate']/100/12)**(batch_data['loan_term'])
	) / (
	(1 + batch_data['interest_rate']/100/12)**(batch_data['loan_term']) - 1
	)

	if 'debt_to_income' not in batch_data.columns:
	batch_data['debt_to_income'] = (batch_data['monthly_payment'] * 12) / batch_data['annual_income'].replace(0, 1)

	# Make predictions
	if st.button("Run Batch Prediction"):
	with st.spinner("Processing batch predictions..."):
	# Make predictions
	recovery_probs = predict_recovery(model, batch_data)

	# Add predictions to the dataframe
	batch_data['recovery_probability'] = recovery_probs
	batch_data['recovery_prediction'] = (recovery_probs >= 0.5).astype(int)

	# Display results
	st.subheader("Prediction Results")
	st.dataframe(batch_data)

	# Summary statistics
	st.subheader("Summary")
	avg_prob = batch_data['recovery_probability'].mean() * 100
	predicted_recoveries = batch_data['recovery_prediction'].sum()
	recovery_rate = predicted_recoveries / len(batch_data) * 100

	col1, col2 = st.columns(2)
	with col1:
	st.metric("Average Recovery Probability", f"{avg_prob:.2f}%")
	with col2:
	st.metric("Predicted Recovery Rate", f"{recovery_rate:.2f}% ({predicted_recoveries}/{len(batch_data)})")

	# Distribution of probabilities
	st.subheader("Distribution of Recovery Probabilities")
	fig, ax = plt.subplots(figsize=(10, 6))
	sns.histplot(batch_data['recovery_probability'], bins=20, kde=True, ax=ax)
	ax.set_xlabel("Recovery Probability")
	ax.set_ylabel("Count")
	ax.axvline(0.5, color='red', linestyle='--')
	ax.text(0.5, ax.get_ylim()[1]*0.9, "Decision Threshold",
	rotation=90, va='top', ha='right', color='red')
	st.pyplot(fig)

	# Download results
	csv = convert_df_to_csv(batch_data)
	st.download_button(
	"Download Results CSV",
	csv,
	"loan_recovery_predictions.csv",
	"text/csv",
	key='download-results'
	)



	if __name__ == "__main__":
	main()