Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel | |
import joblib | |
import re | |
# Load the model once when the app starts | |
model = joblib.load("model.joblib") | |
app = FastAPI(title="Email Classification API") | |
def root(): | |
return {"message": "Email Classification API is running."} | |
# Input data schema | |
class EmailInput(BaseModel): | |
subject: str = "" | |
email: str | |
# PII masking function (same as your training) | |
def mask_and_store_all_pii(text): | |
text = str(text) | |
pii_map = {} | |
patterns = { | |
"email": r"\b[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+\b", | |
"phone_number": r"\b\d{10}\b", | |
"dob": r"\b\d{2}[/-]\d{2}[/-]\d{4}\b", | |
"aadhar_num": r"\b\d{4}[- ]?\d{4}[- ]?\d{4}\b", | |
"credit_debit_no": r"\b(?:\d[ -]*?){13,16}\b", | |
"cvv_no": r"\b\d{3}\b", | |
"expiry_no": r"\b(0[1-9]|1[0-2])\/\d{2,4}\b", | |
"full_name": r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+)+)\b" | |
} | |
for label, pattern in patterns.items(): | |
matches = re.findall(pattern, text) | |
for i, match in enumerate(matches): | |
placeholder = f"[{label}_{i}]" | |
pii_map[placeholder] = match | |
text = text.replace(match, placeholder) | |
return text, pii_map | |
# Restore PII function | |
def restore_pii(masked_text, pii_map): | |
for placeholder, original in pii_map.items(): | |
masked_text = masked_text.replace(placeholder, original) | |
return masked_text | |
def classify_email(data: EmailInput): | |
# Combine subject + email text | |
raw_text = f"{data.subject} {data.email}" | |
# Mask PII | |
masked_text, pii_map = mask_and_store_all_pii(raw_text) | |
# Predict class | |
prediction = model.predict([masked_text])[0] | |
# Return prediction and masked email | |
return { | |
"predicted_category": prediction, | |
"masked_text": masked_text, | |
"pii_map": pii_map | |
} | |