Spaces:
Paused
Paused
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
import re | |
import matplotlib.pyplot as plt | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.preprocessing import LabelEncoder | |
from fuzzywuzzy import process | |
# Data generation and preprocessing | |
def load_data(): | |
np.random.seed(42) | |
cities = ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix'] | |
incomes = ['Low', 'Medium', 'High'] | |
data = pd.DataFrame({ | |
'TransactionID': range(1, 1001), | |
'Amount': np.random.uniform(10, 15000, 1000).round(2), | |
'Type': np.random.choice(['Credit', 'Debit'], 1000), | |
'City': np.random.choice(cities, 1000), | |
'Age': np.random.randint(18, 70, 1000), | |
'Income': np.random.choice(incomes, 1000, p=[0.4, 0.4, 0.2]) | |
}) | |
# Fraud patterns | |
data['Fraud'] = 0 | |
data.loc[ | |
((data['Amount'] > 5000) & (data['Income'] == 'Low')) | | |
((data['Type'] == 'Credit') & (data['Amount'] > 8000)) | | |
((data['City'] == 'New York') & (data['Age'].between(20, 35)) & (data['Amount'] > 6000)), | |
'Fraud' | |
] = 1 | |
return data | |
data = load_data() | |
# Initialize encoders | |
le_type = LabelEncoder() | |
le_city = LabelEncoder() | |
le_income = LabelEncoder() | |
data['Type_encoded'] = le_type.fit_transform(data['Type']) | |
data['City_encoded'] = le_city.fit_transform(data['City']) | |
data['Income_encoded'] = le_income.fit_transform(data['Income']) | |
# Train model | |
model = RandomForestClassifier(random_state=42, n_estimators=100) | |
model.fit(data[['Amount', 'Type_encoded', 'City_encoded', 'Age', 'Income_encoded']], data['Fraud']) | |
# Fraud prediction function | |
def predict_fraud(amount, trans_type, city, age, income): | |
try: | |
city = city if city in le_city.classes_ else 'Unknown' | |
income = income if income in le_income.classes_ else 'Medium' | |
input_df = pd.DataFrame({ | |
'Amount': [amount], | |
'Type_encoded': le_type.transform([trans_type])[0], | |
'City_encoded': le_city.transform([city])[0] if city in le_city.classes_ else -1, | |
'Age': [age], | |
'Income_encoded': le_income.transform([income])[0] if income in le_income.classes_ else -1 | |
}) | |
proba = model.predict_proba(input_df)[0][1] | |
prediction = model.predict(input_df)[0] | |
explanation = [] | |
if amount > 5000 and income == 'Low': | |
explanation.append("High amount for low income") | |
if amount > 8000 and trans_type == 'Credit': | |
explanation.append("Unusually large credit transaction") | |
if city == 'New York' and 20 <= age <= 35 and amount > 6000: | |
explanation.append("Suspicious pattern for young adults in NYC") | |
return ( | |
f"Prediction: {'Fraudulent' if prediction else 'Legitimate'}\n" | |
f"Confidence: {proba*100:.1f}%\n" | |
f"Risk Factors: {', '.join(explanation) if explanation else 'No specific risks'}" | |
) | |
except Exception as e: | |
return f"Error: {str(e)}" | |
# NLP processing function | |
def process_nl_query(query): | |
try: | |
amount = float(re.search(r'\$?(\d+(?:,\d{3})*(?:\.\d{2})?)', query).group(1).replace(',', '')) | |
trans_type = 'Credit' if 'credit' in query.lower() else 'Debit' | |
city = process.extractOne(query, le_city.classes_)[0] | |
age_match = re.search(r'(\d+)\s*years?', query) | |
age = int(age_match.group(1)) if age_match else data['Age'].median() | |
income = 'Low' if 'low' in query.lower() else ('High' if 'high' in query.lower() else 'Medium') | |
return predict_fraud(amount, trans_type, city, age, income) | |
except Exception as e: | |
return f"Error processing query: {str(e)}" | |
# Visualization function | |
def create_plot(choice): | |
try: | |
fig, ax = plt.subplots(figsize=(10, 6)) | |
if choice == "Fraud by City": | |
city_counts = data[data['Fraud'] == 1]['City'].value_counts() | |
if not city_counts.empty: | |
ax.bar(city_counts.index, city_counts.values) | |
ax.set_title('Fraud Cases by City') | |
ax.set_xlabel('City') | |
ax.set_ylabel('Count') | |
plt.xticks(rotation=45) | |
else: | |
ax.text(0.5, 0.5, 'No fraud data available', | |
ha='center', va='center') | |
ax.set_title('Fraud Cases by City') | |
elif choice == "Fraud by Income": | |
income_counts = data[data['Fraud'] == 1]['Income'].value_counts() | |
if not income_counts.empty: | |
ax.bar(income_counts.index, income_counts.values) | |
ax.set_title('Fraud Cases by Income Level') | |
ax.set_xlabel('Income Level') | |
ax.set_ylabel('Count') | |
else: | |
ax.text(0.5, 0.5, 'No fraud data available', | |
ha='center', va='center') | |
ax.set_title('Fraud Cases by Income Level') | |
elif choice == "Transaction Patterns": | |
fraud_data = data[data['Fraud'] == 1] | |
legit_data = data[data['Fraud'] == 0] | |
if not fraud_data.empty: | |
ax.scatter(legit_data['Amount'], legit_data['Age'], | |
alpha=0.3, label='Legitimate') | |
ax.scatter(fraud_data['Amount'], fraud_data['Age'], | |
color='red', alpha=0.5, label='Fraud') | |
ax.set_title('Transaction Amount vs Age') | |
ax.set_xlabel('Amount') | |
ax.set_ylabel('Age') | |
ax.legend() | |
else: | |
ax.text(0.5, 0.5, 'No fraud data available', | |
ha='center', va='center') | |
ax.set_title('Transaction Amount vs Age') | |
plt.tight_layout() | |
return fig, "" | |
except Exception as e: | |
plt.close() | |
return None, f"Error generating plot: {str(e)}" | |
# Gradio Interface | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# π΅οΈ Banking Fraud Detection System") | |
with gr.Tab("π¬ NLP Query"): | |
gr.Markdown("## Analyze Transactions with Natural Language") | |
with gr.Row(): | |
nl_input = gr.Textbox(label="Describe transaction", placeholder="e.g., 'Credit of $6000 in New York for a 26-year-old with low income'") | |
nl_btn = gr.Button("Analyze", variant="primary") | |
nl_output = gr.Textbox(label="Analysis Result", lines=4) | |
gr.Examples( | |
examples=[ | |
"$8000 credit in Chicago for 45-year-old with medium income", | |
"Verify $300 debit in Phoenix for 60-year-old high income client" | |
], | |
inputs=nl_input | |
) | |
nl_btn.click(process_nl_query, nl_input, nl_output) | |
with gr.Tab("π Manual Input"): | |
gr.Markdown("## Manual Transaction Analysis") | |
with gr.Row(): | |
amount = gr.Number(label="Amount", minimum=0) | |
trans_type = gr.Dropdown(["Credit", "Debit"], label="Type") | |
with gr.Row(): | |
city = gr.Dropdown(le_city.classes_.tolist(), label="City") | |
age = gr.Number(label="Age", minimum=18) | |
income = gr.Dropdown(le_income.classes_.tolist(), label="Income Level") | |
manual_btn = gr.Button("Analyze", variant="primary") | |
manual_output = gr.Textbox(label="Analysis Result", lines=4) | |
manual_btn.click(predict_fraud, [amount, trans_type, city, age, income], manual_output) | |
with gr.Tab("π Data Insights"): | |
gr.Markdown("## Fraud Pattern Visualization") | |
with gr.Row(): | |
plot_choice = gr.Radio( | |
["Fraud by City", "Fraud by Income", "Transaction Patterns"], | |
label="Select Visualization", | |
value="Fraud by City" | |
) | |
with gr.Row(): | |
plot_output = gr.Plot() | |
error_output = gr.Textbox(label="Error Message", visible=False) | |
plot_choice.change( | |
fn=create_plot, | |
inputs=plot_choice, | |
outputs=[plot_output, error_output] | |
) | |
with gr.Tab("π Raw Data"): | |
gr.Markdown("## Complete Transaction Dataset") | |
gr.DataFrame(data) | |
demo.launch() | |