Spaces:

AITestingWorkSpace
/

FraudNLP

Paused

App Files Files Community

FraudNLP / app copy.py

vishalsh13

commit update

7e976f4 7 months ago

raw

history blame contribute delete

5.46 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import re
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.preprocessing import LabelEncoder
	from fuzzywuzzy import process

	# Enhanced data generation with realistic fraud patterns
	def load_data():
	np.random.seed(42)
	cities = ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix']
	age_groups = ['18-25', '26-35', '36-45', '46-55', '56+']
	incomes = ['Low', 'Medium', 'High']

	data = pd.DataFrame({
	'TransactionID': range(1, 1001),
	'Amount': np.random.uniform(10, 15000, 1000).round(2),
	'Type': np.random.choice(['Credit', 'Debit'], 1000),
	'City': np.random.choice(cities, 1000),
	'Age': np.random.randint(18, 70, 1000),
	'Income': np.random.choice(incomes, 1000, p=[0.4, 0.4, 0.2])
	})

	# Create realistic fraud patterns
	data['Fraud'] = 0
	data.loc[
	((data['Amount'] > 5000) & (data['Income'] == 'Low')) \|
	((data['Type'] == 'Credit') & (data['Amount'] > 8000)) \|
	((data['City'] == 'New York') & (data['Age'].between(20, 35)) & (data['Amount'] > 6000)),
	'Fraud'
	] = 1

	return data

	data = load_data()

	# Initialize separate encoders for each feature
	le_type = LabelEncoder()
	le_city = LabelEncoder()
	le_income = LabelEncoder()

	# Fit encoders on full dataset (or training data in real scenarios)
	data['Type_encoded'] = le_type.fit_transform(data['Type'])
	data['City_encoded'] = le_city.fit_transform(data['City'])
	data['Income_encoded'] = le_income.fit_transform(data['Income'])

	# Train model
	features = ['Amount', 'Type_encoded', 'City_encoded', 'Age', 'Income_encoded']
	X = data[features]
	y = data['Fraud']

	model = RandomForestClassifier(random_state=42, n_estimators=100)
	model.fit(X, y)

	def process_nl_query(query):
	try:
	# Extract amount
	amount_match = re.search(r'\$?(\d+(?:,\d{3})*(?:\.\d{2})?)', query)
	if amount_match:
	amount = float(amount_match.group(1).replace(',', ''))
	else:
	return "Error: Could not extract transaction amount."

	# Extract transaction type
	trans_type = 'Credit' if 'credit' in query.lower() else 'Debit'

	# Fuzzy match city
	cities = ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix']
	city_match = process.extractOne(query, cities)
	city = city_match[0] if city_match[1] > 70 else 'Unknown'

	# Extract age
	age_match = re.search(r'(\d+)\s(?:years?\|yrs?)?(?:\sold)?', query)
	age = int(age_match.group(1)) if age_match else None

	# Extract income level
	income = 'Low' if 'low' in query.lower() else \
	'High' if 'high' in query.lower() else 'Medium'

	# Handle unseen labels
	city_encoded = le_city.transform([city])[0] if city in le_city.classes_ else -1
	income_encoded = le_income.transform([income])[0] if income in le_income.classes_ else -1

	# Prepare input
	input_df = pd.DataFrame({
	'Amount': [amount],
	'Type_encoded': le_type.transform([trans_type])[0],
	'City_encoded': city_encoded,
	'Age': [age] if age else data['Age'].median(), # Handle missing age
	'Income_encoded': income_encoded
	})

	# Predict
	proba = model.predict_proba(input_df)[0][1]
	prediction = model.predict(input_df)[0]

	# Generate explanation
	explanation = []
	if amount > 5000 and income == 'Low':
	explanation.append("High amount for low income")
	if amount > 8000 and trans_type == 'Credit':
	explanation.append("Unusually large credit transaction")
	if city == 'New York' and 20 <= age <= 35 and amount > 6000:
	explanation.append("Suspicious pattern for young adults in NYC")

	return (
	f"Transaction Details:\n"
	f"- Amount: ${amount:,.2f}\n"
	f"- Type: {trans_type}\n"
	f"- City: {city}\n"
	f"- Age: {age}\n"
	f"- Income Level: {income}\n\n"
	f"Fraud Analysis:\n"
	f"- Prediction: {'Potentially Fraudulent' if prediction else 'Likely Legitimate'}\n"
	f"- Confidence: {proba*100:.1f}%\n"
	f"- Risk Factors: {', '.join(explanation) if explanation else 'No specific risk factors identified'}"
	)

	except Exception as e:
	return f"Error processing query: {str(e)}"

	# Gradio Interface
	with gr.Blocks() as demo:
	gr.Markdown("## Enhanced Fraud Detection System")

	with gr.Tab("Natural Language Query"):
	gr.Markdown("Example: 'Check a $6000 credit in New York for a 26-year-old with low income'")
	nl_input = gr.Textbox(label="Enter your transaction query:")
	nl_output = gr.Textbox(label="Fraud Analysis", lines=10)
	gr.Examples(
	examples=[
	"Is a $8000 credit in Chicago for a 45-year-old medium income safe?",
	"Verify a $300 debit in Phoenix for a 60-year-old high income client"
	],
	inputs=nl_input
	)
	nl_input.submit(fn=process_nl_query, inputs=nl_input, outputs=nl_output)

	with gr.Tab("Data Insights"):
	gr.Markdown("### Fraud Pattern Analysis")
	gr.DataFrame(data[data['Fraud'] == 1].describe())

	demo.launch()