Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -19,7 +19,7 @@ model = joblib.load("model.joblib")
|
|
19 |
class EmailInput(BaseModel):
|
20 |
input_email_body: str
|
21 |
|
22 |
-
# PII Masking Function
|
23 |
def mask_and_store_all_pii(text):
|
24 |
text = str(text)
|
25 |
pii_map = {}
|
@@ -27,20 +27,22 @@ def mask_and_store_all_pii(text):
|
|
27 |
|
28 |
patterns = {
|
29 |
"email": r"\b[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+\b",
|
30 |
-
"phone_number": r"\
|
31 |
-
"dob": r"\b\d{2}[
|
32 |
-
"aadhar_num": r"\b\d{4}[-
|
33 |
-
"credit_debit_no": r"\b(?:\d[ -]*?){13,
|
34 |
-
"cvv_no": r"\b\d{3}\b",
|
35 |
-
"expiry_no": r"\b(0[1-9]|1[0-2])
|
36 |
-
|
37 |
}
|
38 |
|
39 |
for label, pattern in patterns.items():
|
40 |
for match in re.finditer(pattern, text):
|
41 |
original = match.group()
|
42 |
start, end = match.start(), match.end()
|
43 |
-
placeholder = f"[{label}_{len(pii_map)}]"
|
|
|
|
|
44 |
pii_map[placeholder] = original
|
45 |
entity_list.append({
|
46 |
"position": [start, end],
|
@@ -81,4 +83,3 @@ def classify_email(data: EmailInput):
|
|
81 |
@app.get("/")
|
82 |
def root():
|
83 |
return {"message": "Email Classification API is running."}
|
84 |
-
|
|
|
19 |
class EmailInput(BaseModel):
|
20 |
input_email_body: str
|
21 |
|
22 |
+
# Updated PII Masking Function (fixes Aadhaar vs Card and name misclassifications)
|
23 |
def mask_and_store_all_pii(text):
|
24 |
text = str(text)
|
25 |
pii_map = {}
|
|
|
27 |
|
28 |
patterns = {
|
29 |
"email": r"\b[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+\b",
|
30 |
+
"phone_number": r"\+?\d[\d\s\-]{7,14}\d",
|
31 |
+
"dob": r"\b\d{1,2}[\/\-\.\s]\d{1,2}[\/\-\.\s]\d{2,4}\b",
|
32 |
+
"aadhar_num": r"\b\d{4}[ -]?\d{4}[ -]?\d{4}\b(?!\d)",
|
33 |
+
"credit_debit_no": r"\b(?:\d[ -]*?){13,19}\b",
|
34 |
+
"cvv_no": r"(?i)\b(?:cvv[:\s\-]*)?(\d{3,4})\b",
|
35 |
+
"expiry_no": r"\b(0[1-9]|1[0-2])[\/\-]\d{2,4}\b",
|
36 |
+
# Avoid naive full name pattern to prevent false positives like 'Dear Sir'
|
37 |
}
|
38 |
|
39 |
for label, pattern in patterns.items():
|
40 |
for match in re.finditer(pattern, text):
|
41 |
original = match.group()
|
42 |
start, end = match.start(), match.end()
|
43 |
+
placeholder = f"[{label}_{len(pii_map):03d}]"
|
44 |
+
if original not in text:
|
45 |
+
continue
|
46 |
pii_map[placeholder] = original
|
47 |
entity_list.append({
|
48 |
"position": [start, end],
|
|
|
83 |
@app.get("/")
|
84 |
def root():
|
85 |
return {"message": "Email Classification API is running."}
|
|