|
|
|
from fastapi import FastAPI, HTTPException |
|
from pydantic import BaseModel |
|
import pandas as pd |
|
from typing import Optional |
|
import joblib |
|
import os |
|
|
|
app = FastAPI() |
|
|
|
|
|
TFIDF_VECTORIZER_PATH = "models/tfidf_vectorizer.pkl" |
|
MODELS_PATH = "models/xgb_models.pkl" |
|
LABEL_ENCODERS_PATH = "models/label_encoders.pkl" |
|
|
|
tfidf_vectorizer = joblib.load(TFIDF_VECTORIZER_PATH) |
|
models = joblib.load(MODELS_PATH) |
|
label_encoders = joblib.load(LABEL_ENCODERS_PATH) |
|
|
|
class TransactionData(BaseModel): |
|
Transaction_Id: str |
|
Hit_Seq: int |
|
Hit_Id_List: str |
|
Origin: str |
|
Designation: str |
|
Keywords: str |
|
Name: str |
|
SWIFT_Tag: str |
|
Currency: str |
|
Entity: str |
|
Message: str |
|
City: str |
|
Country: str |
|
State: str |
|
Hit_Type: str |
|
Record_Matching_String: str |
|
WatchList_Match_String: str |
|
Payment_Sender_Name: Optional[str] = "" |
|
Payment_Reciever_Name: Optional[str] = "" |
|
Swift_Message_Type: str |
|
Text_Sanction_Data: str |
|
Matched_Sanctioned_Entity: str |
|
Is_Match: int |
|
Red_Flag_Reason: str |
|
Risk_Level: str |
|
Risk_Score: float |
|
Risk_Score_Description: str |
|
CDD_Level: str |
|
PEP_Status: str |
|
Value_Date: str |
|
Last_Review_Date: str |
|
Next_Review_Date: str |
|
Sanction_Description: str |
|
Checker_Notes: str |
|
Sanction_Context: str |
|
Maker_Action: str |
|
Customer_ID: int |
|
Customer_Type: str |
|
Industry: str |
|
Transaction_Date_Time: str |
|
Transaction_Type: str |
|
Transaction_Channel: str |
|
Originating_Bank: str |
|
Beneficiary_Bank: str |
|
Geographic_Origin: str |
|
Geographic_Destination: str |
|
Match_Score: float |
|
Match_Type: str |
|
Sanctions_List_Version: str |
|
Screening_Date_Time: str |
|
Risk_Category: str |
|
Risk_Drivers: str |
|
Alert_Status: str |
|
Investigation_Outcome: str |
|
Case_Owner_Analyst: str |
|
Escalation_Level: str |
|
Escalation_Date: str |
|
Regulatory_Reporting_Flags: bool |
|
Audit_Trail_Timestamp: str |
|
Source_Of_Funds: str |
|
Purpose_Of_Transaction: str |
|
Beneficial_Owner: str |
|
Sanctions_Exposure_History: bool |
|
|
|
class PredictionRequest(BaseModel): |
|
transaction_data: TransactionData |
|
|
|
@app.get("/") |
|
async def root(): |
|
return {"status": "healthy", "message": "XGBoost TF-IDF API is running"} |
|
|
|
@app.post("/predict") |
|
async def predict(request: PredictionRequest): |
|
try: |
|
input_data = pd.DataFrame([request.transaction_data.dict()]) |
|
|
|
text_input = f""" |
|
Transaction ID: {input_data['Transaction_Id'].iloc[0]} |
|
Origin: {input_data['Origin'].iloc[0]} |
|
Designation: {input_data['Designation'].iloc[0]} |
|
Keywords: {input_data['Keywords'].iloc[0]} |
|
Name: {input_data['Name'].iloc[0]} |
|
SWIFT Tag: {input_data['SWIFT_Tag'].iloc[0]} |
|
Currency: {input_data['Currency'].iloc[0]} |
|
Entity: {input_data['Entity'].iloc[0]} |
|
Message: {input_data['Message'].iloc[0]} |
|
City: {input_data['City'].iloc[0]} |
|
Country: {input_data['Country'].iloc[0]} |
|
State: {input_data['State'].iloc[0]} |
|
Hit Type: {input_data['Hit_Type'].iloc[0]} |
|
Record Matching String: {input_data['Record_Matching_String'].iloc[0]} |
|
WatchList Match String: {input_data['WatchList_Match_String'].iloc[0]} |
|
Payment Sender: {input_data['Payment_Sender_Name'].iloc[0]} |
|
Payment Receiver: {input_data['Payment_Reciever_Name'].iloc[0]} |
|
Swift Message Type: {input_data['Swift_Message_Type'].iloc[0]} |
|
Text Sanction Data: {input_data['Text_Sanction_Data'].iloc[0]} |
|
Matched Sanctioned Entity: {input_data['Matched_Sanctioned_Entity'].iloc[0]} |
|
Red Flag Reason: {input_data['Red_Flag_Reason'].iloc[0]} |
|
Risk Level: {input_data['Risk_Level'].iloc[0]} |
|
Risk Score: {input_data['Risk_Score'].iloc[0]} |
|
CDD Level: {input_data['CDD_Level'].iloc[0]} |
|
PEP Status: {input_data['PEP_Status'].iloc[0]} |
|
Sanction Description: {input_data['Sanction_Description'].iloc[0]} |
|
Checker Notes: {input_data['Checker_Notes'].iloc[0]} |
|
Sanction Context: {input_data['Sanction_Context'].iloc[0]} |
|
Maker Action: {input_data['Maker_Action'].iloc[0]} |
|
Customer Type: {input_data['Customer_Type'].iloc[0]} |
|
Industry: {input_data['Industry'].iloc[0]} |
|
Transaction Type: {input_data['Transaction_Type'].iloc[0]} |
|
Transaction Channel: {input_data['Transaction_Channel'].iloc[0]} |
|
Geographic Origin: {input_data['Geographic_Origin'].iloc[0]} |
|
Geographic Destination: {input_data['Geographic_Destination'].iloc[0]} |
|
Risk Category: {input_data['Risk_Category'].iloc[0]} |
|
Risk Drivers: {input_data['Risk_Drivers'].iloc[0]} |
|
Alert Status: {input_data['Alert_Status'].iloc[0]} |
|
Investigation Outcome: {input_data['Investigation_Outcome'].iloc[0]} |
|
Source of Funds: {input_data['Source_Of_Funds'].iloc[0]} |
|
Purpose of Transaction: {input_data['Purpose_Of_Transaction'].iloc[0]} |
|
Beneficial Owner: {input_data['Beneficial_Owner'].iloc[0]} |
|
""" |
|
|
|
X_tfidf = tfidf_vectorizer.transform([text_input]) |
|
|
|
response = {} |
|
for label, model in models.items(): |
|
proba = model.predict_proba(X_tfidf)[0] |
|
pred = proba.argmax() |
|
decoded_label = label_encoders[label].inverse_transform([pred])[0] |
|
class_probs = { |
|
label_encoders[label].classes_[i]: float(prob) |
|
for i, prob in enumerate(proba) |
|
} |
|
response[label] = { |
|
"prediction": decoded_label, |
|
"probabilities": class_probs |
|
} |
|
|
|
return response |
|
|
|
except Exception as e: |
|
raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
if __name__ == "__main__": |
|
import uvicorn |
|
port = int(os.environ.get("PORT", 7860)) |
|
uvicorn.run(app, host="0.0.0.0", port=port) |
|
|