import gradio as gr
import pandas as pd
import numpy as np
import re
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from fuzzywuzzy import process

# Data generation and preprocessing
def load_data():
    np.random.seed(42)
    cities = ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix']
    incomes = ['Low', 'Medium', 'High']
    
    data = pd.DataFrame({
        'TransactionID': range(1, 1001),
        'Amount': np.random.uniform(10, 15000, 1000).round(2),
        'Type': np.random.choice(['Credit', 'Debit'], 1000),
        'City': np.random.choice(cities, 1000),
        'Age': np.random.randint(18, 70, 1000),
        'Income': np.random.choice(incomes, 1000, p=[0.4, 0.4, 0.2])
    })
    
    # Fraud patterns
    data['Fraud'] = 0
    data.loc[
        ((data['Amount'] > 5000) & (data['Income'] == 'Low')) |
        ((data['Type'] == 'Credit') & (data['Amount'] > 8000)) |
        ((data['City'] == 'New York') & (data['Age'].between(20, 35)) & (data['Amount'] > 6000)),
        'Fraud'
    ] = 1
    
    return data

data = load_data()

# Initialize encoders
le_type = LabelEncoder()
le_city = LabelEncoder()
le_income = LabelEncoder()

data['Type_encoded'] = le_type.fit_transform(data['Type'])
data['City_encoded'] = le_city.fit_transform(data['City'])
data['Income_encoded'] = le_income.fit_transform(data['Income'])

# Train model
model = RandomForestClassifier(random_state=42, n_estimators=100)
model.fit(data[['Amount', 'Type_encoded', 'City_encoded', 'Age', 'Income_encoded']], data['Fraud'])

# Fraud prediction function
def predict_fraud(amount, trans_type, city, age, income):
    try:
        city = city if city in le_city.classes_ else 'Unknown'
        income = income if income in le_income.classes_ else 'Medium'
        
        input_df = pd.DataFrame({
            'Amount': [amount],
            'Type_encoded': le_type.transform([trans_type])[0],
            'City_encoded': le_city.transform([city])[0] if city in le_city.classes_ else -1,
            'Age': [age],
            'Income_encoded': le_income.transform([income])[0] if income in le_income.classes_ else -1
        })
        
        proba = model.predict_proba(input_df)[0][1]
        prediction = model.predict(input_df)[0]
        
        explanation = []
        if amount > 5000 and income == 'Low':
            explanation.append("High amount for low income")
        if amount > 8000 and trans_type == 'Credit':
            explanation.append("Unusually large credit transaction")
        if city == 'New York' and 20 <= age <= 35 and amount > 6000:
            explanation.append("Suspicious pattern for young adults in NYC")
            
        return (
            f"Prediction: {'Fraudulent' if prediction else 'Legitimate'}\n"
            f"Confidence: {proba*100:.1f}%\n"
            f"Risk Factors: {', '.join(explanation) if explanation else 'No specific risks'}"
        )
    except Exception as e:
        return f"Error: {str(e)}"

# NLP processing function
def process_nl_query(query):
    try:
        amount = float(re.search(r'\$?(\d+(?:,\d{3})*(?:\.\d{2})?)', query).group(1).replace(',', ''))
        trans_type = 'Credit' if 'credit' in query.lower() else 'Debit'
        city = process.extractOne(query, le_city.classes_)[0]
        age_match = re.search(r'(\d+)\s*years?', query)
        age = int(age_match.group(1)) if age_match else data['Age'].median()
        income = 'Low' if 'low' in query.lower() else ('High' if 'high' in query.lower() else 'Medium')
        
        return predict_fraud(amount, trans_type, city, age, income)
    except Exception as e:
        return f"Error processing query: {str(e)}"

# Visualization function
def create_plot(choice):
    try:
        fig, ax = plt.subplots(figsize=(10, 6))
        
        if choice == "Fraud by City":
            city_counts = data[data['Fraud'] == 1]['City'].value_counts()
            if not city_counts.empty:
                ax.bar(city_counts.index, city_counts.values)
                ax.set_title('Fraud Cases by City')
                ax.set_xlabel('City')
                ax.set_ylabel('Count')
                plt.xticks(rotation=45)
            else:
                ax.text(0.5, 0.5, 'No fraud data available', 
                        ha='center', va='center')
                ax.set_title('Fraud Cases by City')
                
        elif choice == "Fraud by Income":
            income_counts = data[data['Fraud'] == 1]['Income'].value_counts()
            if not income_counts.empty:
                ax.bar(income_counts.index, income_counts.values)
                ax.set_title('Fraud Cases by Income Level')
                ax.set_xlabel('Income Level')
                ax.set_ylabel('Count')
            else:
                ax.text(0.5, 0.5, 'No fraud data available', 
                        ha='center', va='center')
                ax.set_title('Fraud Cases by Income Level')
                
        elif choice == "Transaction Patterns":
            fraud_data = data[data['Fraud'] == 1]
            legit_data = data[data['Fraud'] == 0]
            if not fraud_data.empty:
                ax.scatter(legit_data['Amount'], legit_data['Age'], 
                          alpha=0.3, label='Legitimate')
                ax.scatter(fraud_data['Amount'], fraud_data['Age'], 
                          color='red', alpha=0.5, label='Fraud')
                ax.set_title('Transaction Amount vs Age')
                ax.set_xlabel('Amount')
                ax.set_ylabel('Age')
                ax.legend()
            else:
                ax.text(0.5, 0.5, 'No fraud data available', 
                        ha='center', va='center')
                ax.set_title('Transaction Amount vs Age')
                
        plt.tight_layout()
        return fig, ""
        
    except Exception as e:
        plt.close()
        return None, f"Error generating plot: {str(e)}"

# Gradio Interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🕵️ Banking Fraud Detection System")
    
    with gr.Tab("💬 NLP Query"):
        gr.Markdown("## Analyze Transactions with Natural Language")
        with gr.Row():
            nl_input = gr.Textbox(label="Describe transaction", placeholder="e.g., 'Credit of $6000 in New York for a 26-year-old with low income'")
            nl_btn = gr.Button("Analyze", variant="primary")
        nl_output = gr.Textbox(label="Analysis Result", lines=4)
        gr.Examples(
            examples=[
                "$8000 credit in Chicago for 45-year-old with medium income",
                "Verify $300 debit in Phoenix for 60-year-old high income client"
            ],
            inputs=nl_input
        )
        nl_btn.click(process_nl_query, nl_input, nl_output)
    
    with gr.Tab("📝 Manual Input"):
        gr.Markdown("## Manual Transaction Analysis")
        with gr.Row():
            amount = gr.Number(label="Amount", minimum=0)
            trans_type = gr.Dropdown(["Credit", "Debit"], label="Type")
        with gr.Row():
            city = gr.Dropdown(le_city.classes_.tolist(), label="City")
            age = gr.Number(label="Age", minimum=18)
        income = gr.Dropdown(le_income.classes_.tolist(), label="Income Level")
        manual_btn = gr.Button("Analyze", variant="primary")
        manual_output = gr.Textbox(label="Analysis Result", lines=4)
        manual_btn.click(predict_fraud, [amount, trans_type, city, age, income], manual_output)
    
    with gr.Tab("📊 Data Insights"):
        gr.Markdown("## Fraud Pattern Visualization")
        with gr.Row():
            plot_choice = gr.Radio(
                ["Fraud by City", "Fraud by Income", "Transaction Patterns"],
                label="Select Visualization",
                value="Fraud by City"
            )
        with gr.Row():
            plot_output = gr.Plot()
            error_output = gr.Textbox(label="Error Message", visible=False)
            
        plot_choice.change(
            fn=create_plot,
            inputs=plot_choice,
            outputs=[plot_output, error_output]
        )
    
    with gr.Tab("📁 Raw Data"):
        gr.Markdown("## Complete Transaction Dataset")
        gr.DataFrame(data)

demo.launch()