sanabanu31 commited on
Commit
11184ec
·
verified ·
1 Parent(s): 6b21b32

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -20
app.py CHANGED
@@ -3,25 +3,28 @@ from pydantic import BaseModel
3
  import joblib
4
  import re
5
 
 
6
  app = FastAPI(
7
  title="Email Classification API",
8
  version="1.0.0",
9
  description="Classifies support emails into categories and masks personal information.",
10
- docs_url="/docs", # Swagger UI enabled here
11
- redoc_url="/redoc" # Optional ReDoc UI
12
  )
13
 
14
- # Load model
15
  model = joblib.load("model.joblib")
16
 
17
- # Email input structure
18
  class EmailInput(BaseModel):
19
- subject: str = ""
20
- email: str
21
 
22
- # PII masker
23
  def mask_and_store_all_pii(text):
 
24
  pii_map = {}
 
 
25
  patterns = {
26
  "email": r"\b[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+\b",
27
  "phone_number": r"\b\d{10}\b",
@@ -34,27 +37,48 @@ def mask_and_store_all_pii(text):
34
  }
35
 
36
  for label, pattern in patterns.items():
37
- matches = re.findall(pattern, text)
38
- for i, match in enumerate(matches):
39
- placeholder = f"[{label}_{i}]"
40
- pii_map[placeholder] = match
41
- text = text.replace(match, placeholder)
 
 
 
 
 
 
 
 
42
 
43
- return text, pii_map
 
 
 
 
 
44
 
45
- # Main endpoint
46
  @app.post("/classify")
47
  def classify_email(data: EmailInput):
48
- raw_text = f"{data.subject} {data.email}"
49
- masked_text, pii_map = mask_and_store_all_pii(raw_text)
50
- prediction = model.predict([masked_text])[0]
 
51
 
 
 
 
 
52
  return {
53
- "masked_text": masked_text,
54
- "predicted_category": prediction,
55
- "pii_map": pii_map
 
56
  }
57
 
 
58
  @app.get("/")
59
  def root():
60
  return {"message": "Email Classification API is running."}
 
 
3
  import joblib
4
  import re
5
 
6
+ # Initialize FastAPI app
7
  app = FastAPI(
8
  title="Email Classification API",
9
  version="1.0.0",
10
  description="Classifies support emails into categories and masks personal information.",
11
+ docs_url="/docs",
12
+ redoc_url="/redoc"
13
  )
14
 
15
+ # Load pre-trained model
16
  model = joblib.load("model.joblib")
17
 
18
+ # Input schema
19
  class EmailInput(BaseModel):
20
+ input_email_body: str
 
21
 
22
+ # PII Masking Function
23
  def mask_and_store_all_pii(text):
24
+ text = str(text)
25
  pii_map = {}
26
+ entity_list = []
27
+
28
  patterns = {
29
  "email": r"\b[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+\b",
30
  "phone_number": r"\b\d{10}\b",
 
37
  }
38
 
39
  for label, pattern in patterns.items():
40
+ for match in re.finditer(pattern, text):
41
+ original = match.group()
42
+ start, end = match.start(), match.end()
43
+ placeholder = f"[{label}_{len(pii_map)}]"
44
+ pii_map[placeholder] = original
45
+ entity_list.append({
46
+ "position": [start, end],
47
+ "classification": label,
48
+ "entity": original
49
+ })
50
+ text = text.replace(original, placeholder, 1)
51
+
52
+ return text, pii_map, entity_list
53
 
54
+ # Restore PII
55
+ def restore_pii(masked_text, pii_map):
56
+ restored = masked_text
57
+ for placeholder, original in pii_map.items():
58
+ restored = restored.replace(placeholder, original)
59
+ return restored
60
 
61
+ # Classification Endpoint
62
  @app.post("/classify")
63
  def classify_email(data: EmailInput):
64
+ raw_text = data.input_email_body
65
+
66
+ # Masking
67
+ masked_text, pii_map, entity_list = mask_and_store_all_pii(raw_text)
68
 
69
+ # Prediction
70
+ predicted_category = model.predict([masked_text])[0]
71
+
72
+ # Response format
73
  return {
74
+ "input_email_body": raw_text,
75
+ "list_of_masked_entities": entity_list,
76
+ "masked_email": masked_text,
77
+ "category_of_the_email": predicted_category
78
  }
79
 
80
+ # Health check endpoint
81
  @app.get("/")
82
  def root():
83
  return {"message": "Email Classification API is running."}
84
+