import gradio as gr import pandas as pd import numpy as np import re import matplotlib.pyplot as plt from sklearn.ensemble import RandomForestClassifier from sklearn.preprocessing import LabelEncoder from fuzzywuzzy import process # Data generation and preprocessing def load_data(): np.random.seed(42) cities = ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix'] incomes = ['Low', 'Medium', 'High'] data = pd.DataFrame({ 'TransactionID': range(1, 1001), 'Amount': np.random.uniform(10, 15000, 1000).round(2), 'Type': np.random.choice(['Credit', 'Debit'], 1000), 'City': np.random.choice(cities, 1000), 'Age': np.random.randint(18, 70, 1000), 'Income': np.random.choice(incomes, 1000, p=[0.4, 0.4, 0.2]) }) # Fraud patterns data['Fraud'] = 0 data.loc[ ((data['Amount'] > 5000) & (data['Income'] == 'Low')) | ((data['Type'] == 'Credit') & (data['Amount'] > 8000)) | ((data['City'] == 'New York') & (data['Age'].between(20, 35)) & (data['Amount'] > 6000)), 'Fraud' ] = 1 return data data = load_data() # Initialize encoders le_type = LabelEncoder() le_city = LabelEncoder() le_income = LabelEncoder() data['Type_encoded'] = le_type.fit_transform(data['Type']) data['City_encoded'] = le_city.fit_transform(data['City']) data['Income_encoded'] = le_income.fit_transform(data['Income']) # Train model model = RandomForestClassifier(random_state=42, n_estimators=100) model.fit(data[['Amount', 'Type_encoded', 'City_encoded', 'Age', 'Income_encoded']], data['Fraud']) # Fraud prediction function def predict_fraud(amount, trans_type, city, age, income): try: city = city if city in le_city.classes_ else 'Unknown' income = income if income in le_income.classes_ else 'Medium' input_df = pd.DataFrame({ 'Amount': [amount], 'Type_encoded': le_type.transform([trans_type])[0], 'City_encoded': le_city.transform([city])[0] if city in le_city.classes_ else -1, 'Age': [age], 'Income_encoded': le_income.transform([income])[0] if income in le_income.classes_ else -1 }) proba = model.predict_proba(input_df)[0][1] prediction = model.predict(input_df)[0] explanation = [] if amount > 5000 and income == 'Low': explanation.append("High amount for low income") if amount > 8000 and trans_type == 'Credit': explanation.append("Unusually large credit transaction") if city == 'New York' and 20 <= age <= 35 and amount > 6000: explanation.append("Suspicious pattern for young adults in NYC") return ( f"Prediction: {'Fraudulent' if prediction else 'Legitimate'}\n" f"Confidence: {proba*100:.1f}%\n" f"Risk Factors: {', '.join(explanation) if explanation else 'No specific risks'}" ) except Exception as e: return f"Error: {str(e)}" # NLP processing function def process_nl_query(query): try: amount = float(re.search(r'\$?(\d+(?:,\d{3})*(?:\.\d{2})?)', query).group(1).replace(',', '')) trans_type = 'Credit' if 'credit' in query.lower() else 'Debit' city = process.extractOne(query, le_city.classes_)[0] age_match = re.search(r'(\d+)\s*years?', query) age = int(age_match.group(1)) if age_match else data['Age'].median() income = 'Low' if 'low' in query.lower() else ('High' if 'high' in query.lower() else 'Medium') return predict_fraud(amount, trans_type, city, age, income) except Exception as e: return f"Error processing query: {str(e)}" # Visualization function def create_plot(choice): try: fig, ax = plt.subplots(figsize=(10, 6)) if choice == "Fraud by City": city_counts = data[data['Fraud'] == 1]['City'].value_counts() if not city_counts.empty: ax.bar(city_counts.index, city_counts.values) ax.set_title('Fraud Cases by City') ax.set_xlabel('City') ax.set_ylabel('Count') plt.xticks(rotation=45) else: ax.text(0.5, 0.5, 'No fraud data available', ha='center', va='center') ax.set_title('Fraud Cases by City') elif choice == "Fraud by Income": income_counts = data[data['Fraud'] == 1]['Income'].value_counts() if not income_counts.empty: ax.bar(income_counts.index, income_counts.values) ax.set_title('Fraud Cases by Income Level') ax.set_xlabel('Income Level') ax.set_ylabel('Count') else: ax.text(0.5, 0.5, 'No fraud data available', ha='center', va='center') ax.set_title('Fraud Cases by Income Level') elif choice == "Transaction Patterns": fraud_data = data[data['Fraud'] == 1] legit_data = data[data['Fraud'] == 0] if not fraud_data.empty: ax.scatter(legit_data['Amount'], legit_data['Age'], alpha=0.3, label='Legitimate') ax.scatter(fraud_data['Amount'], fraud_data['Age'], color='red', alpha=0.5, label='Fraud') ax.set_title('Transaction Amount vs Age') ax.set_xlabel('Amount') ax.set_ylabel('Age') ax.legend() else: ax.text(0.5, 0.5, 'No fraud data available', ha='center', va='center') ax.set_title('Transaction Amount vs Age') plt.tight_layout() return fig, "" except Exception as e: plt.close() return None, f"Error generating plot: {str(e)}" # Gradio Interface with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 🕵️ Banking Fraud Detection System") with gr.Tab("💬 NLP Query"): gr.Markdown("## Analyze Transactions with Natural Language") with gr.Row(): nl_input = gr.Textbox(label="Describe transaction", placeholder="e.g., 'Credit of $6000 in New York for a 26-year-old with low income'") nl_btn = gr.Button("Analyze", variant="primary") nl_output = gr.Textbox(label="Analysis Result", lines=4) gr.Examples( examples=[ "$8000 credit in Chicago for 45-year-old with medium income", "Verify $300 debit in Phoenix for 60-year-old high income client" ], inputs=nl_input ) nl_btn.click(process_nl_query, nl_input, nl_output) with gr.Tab("📝 Manual Input"): gr.Markdown("## Manual Transaction Analysis") with gr.Row(): amount = gr.Number(label="Amount", minimum=0) trans_type = gr.Dropdown(["Credit", "Debit"], label="Type") with gr.Row(): city = gr.Dropdown(le_city.classes_.tolist(), label="City") age = gr.Number(label="Age", minimum=18) income = gr.Dropdown(le_income.classes_.tolist(), label="Income Level") manual_btn = gr.Button("Analyze", variant="primary") manual_output = gr.Textbox(label="Analysis Result", lines=4) manual_btn.click(predict_fraud, [amount, trans_type, city, age, income], manual_output) with gr.Tab("📊 Data Insights"): gr.Markdown("## Fraud Pattern Visualization") with gr.Row(): plot_choice = gr.Radio( ["Fraud by City", "Fraud by Income", "Transaction Patterns"], label="Select Visualization", value="Fraud by City" ) with gr.Row(): plot_output = gr.Plot() error_output = gr.Textbox(label="Error Message", visible=False) plot_choice.change( fn=create_plot, inputs=plot_choice, outputs=[plot_output, error_output] ) with gr.Tab("📁 Raw Data"): gr.Markdown("## Complete Transaction Dataset") gr.DataFrame(data) demo.launch()