sanabanu31 commited on
Commit
bf70aa2
·
verified ·
1 Parent(s): b41db50

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -0
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ import joblib
4
+ import re
5
+
6
+ # Load the model once when the app starts
7
+ model = joblib.load("model.joblib")
8
+
9
+ app = FastAPI(title="Email Classification API")
10
+
11
+ @app.get("/")
12
+ def root():
13
+ return {"message": "Email Classification API is running."}
14
+
15
+
16
+ # Input data schema
17
+ class EmailInput(BaseModel):
18
+ subject: str = ""
19
+ email: str
20
+
21
+ # PII masking function (same as your training)
22
+ def mask_and_store_all_pii(text):
23
+ text = str(text)
24
+ pii_map = {}
25
+
26
+ patterns = {
27
+ "email": r"\b[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+\b",
28
+ "phone_number": r"\b\d{10}\b",
29
+ "dob": r"\b\d{2}[/-]\d{2}[/-]\d{4}\b",
30
+ "aadhar_num": r"\b\d{4}[- ]?\d{4}[- ]?\d{4}\b",
31
+ "credit_debit_no": r"\b(?:\d[ -]*?){13,16}\b",
32
+ "cvv_no": r"\b\d{3}\b",
33
+ "expiry_no": r"\b(0[1-9]|1[0-2])\/\d{2,4}\b",
34
+ "full_name": r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+)+)\b"
35
+ }
36
+
37
+ for label, pattern in patterns.items():
38
+ matches = re.findall(pattern, text)
39
+ for i, match in enumerate(matches):
40
+ placeholder = f"[{label}_{i}]"
41
+ pii_map[placeholder] = match
42
+ text = text.replace(match, placeholder)
43
+
44
+ return text, pii_map
45
+
46
+ # Restore PII function
47
+ def restore_pii(masked_text, pii_map):
48
+ for placeholder, original in pii_map.items():
49
+ masked_text = masked_text.replace(placeholder, original)
50
+ return masked_text
51
+
52
+ @app.post("/classify")
53
+ def classify_email(data: EmailInput):
54
+ # Combine subject + email text
55
+ raw_text = f"{data.subject} {data.email}"
56
+
57
+ # Mask PII
58
+ masked_text, pii_map = mask_and_store_all_pii(raw_text)
59
+
60
+ # Predict class
61
+ prediction = model.predict([masked_text])[0]
62
+
63
+ # Return prediction and masked email
64
+ return {
65
+ "predicted_category": prediction,
66
+ "masked_text": masked_text,
67
+ "pii_map": pii_map
68
+ }