File size: 1,914 Bytes
bf70aa2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import joblib
import re

# Load the model once when the app starts
model = joblib.load("model.joblib")

app = FastAPI(title="Email Classification API")

@app.get("/")
def root():
    return {"message": "Email Classification API is running."}


# Input data schema
class EmailInput(BaseModel):
    subject: str = ""
    email: str

# PII masking function (same as your training)
def mask_and_store_all_pii(text):
    text = str(text)
    pii_map = {}

    patterns = {
        "email": r"\b[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+\b",
        "phone_number": r"\b\d{10}\b",
        "dob": r"\b\d{2}[/-]\d{2}[/-]\d{4}\b",
        "aadhar_num": r"\b\d{4}[- ]?\d{4}[- ]?\d{4}\b",
        "credit_debit_no": r"\b(?:\d[ -]*?){13,16}\b",
        "cvv_no": r"\b\d{3}\b",
        "expiry_no": r"\b(0[1-9]|1[0-2])\/\d{2,4}\b",
        "full_name": r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+)+)\b"
    }

    for label, pattern in patterns.items():
        matches = re.findall(pattern, text)
        for i, match in enumerate(matches):
            placeholder = f"[{label}_{i}]"
            pii_map[placeholder] = match
            text = text.replace(match, placeholder)

    return text, pii_map

# Restore PII function
def restore_pii(masked_text, pii_map):
    for placeholder, original in pii_map.items():
        masked_text = masked_text.replace(placeholder, original)
    return masked_text

@app.post("/classify")
def classify_email(data: EmailInput):
    # Combine subject + email text
    raw_text = f"{data.subject} {data.email}"

    # Mask PII
    masked_text, pii_map = mask_and_store_all_pii(raw_text)

    # Predict class
    prediction = model.predict([masked_text])[0]

    # Return prediction and masked email
    return {
        "predicted_category": prediction,
        "masked_text": masked_text,
        "pii_map": pii_map
    }