sanabanu31 commited on
Commit
9b81b0a
·
verified ·
1 Parent(s): 25952c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -32
app.py CHANGED
@@ -3,8 +3,6 @@ from pydantic import BaseModel
3
  import joblib
4
  import pandas as pd
5
  import re
6
- from sklearn.feature_extraction.text import TfidfVectorizer
7
- from sklearn.svm import LinearSVC
8
  from transformers import pipeline
9
 
10
  # Initialize FastAPI app
@@ -16,9 +14,8 @@ app = FastAPI(
16
  redoc_url="/redoc"
17
  )
18
 
19
- # Load model and vectorizer
20
  model = joblib.load("model.joblib")
21
- vectorizer = joblib.load("vectorizer.joblib")
22
 
23
  # Initialize NER pipeline
24
  ner = pipeline('ner', model='Davlan/xlm-roberta-base-ner-hrl', grouped_entities=True)
@@ -114,8 +111,7 @@ def restore_pii(masked_text, pii_map):
114
  def classify_email(data: EmailInput):
115
  raw_text = data.input_email_body
116
  masked_text, pii_map, entity_list = mask_and_store_all_pii(raw_text)
117
- features = vectorizer.transform([masked_text])
118
- predicted_category = model.predict(features)[0]
119
  return {
120
  "input_email_body": raw_text,
121
  "list_of_masked_entities": entity_list,
@@ -123,32 +119,6 @@ def classify_email(data: EmailInput):
123
  "category_of_the_email": predicted_category
124
  }
125
 
126
- # Retraining endpoint
127
- @app.post("/train")
128
- def train_model(new_example: TrainingExample):
129
- df = pd.DataFrame([{"email_body": new_example.email_body, "label": new_example.label}])
130
- try:
131
- df.to_csv("training_data.csv", mode='a', header=not pd.io.common.file_exists("training_data.csv"), index=False)
132
- except Exception as e:
133
- return {"error": f"Failed to append to dataset: {str(e)}"}
134
-
135
- # Load dataset
136
- full_df = pd.read_csv("training_data.csv")
137
- full_df['masked_text'] = full_df['email_body'].apply(lambda x: mask_and_store_all_pii(x)[0])
138
-
139
- # Vectorize and train
140
- new_vectorizer = TfidfVectorizer()
141
- X = new_vectorizer.fit_transform(full_df['masked_text'])
142
- y = full_df['label']
143
- new_model = LinearSVC()
144
- new_model.fit(X, y)
145
-
146
- # Save updated model and vectorizer
147
- joblib.dump(new_model, "model.joblib")
148
- joblib.dump(new_vectorizer, "vectorizer.joblib")
149
-
150
- return {"message": "Model retrained successfully with new example."}
151
-
152
  # Health check
153
  @app.get("/")
154
  def root():
 
3
  import joblib
4
  import pandas as pd
5
  import re
 
 
6
  from transformers import pipeline
7
 
8
  # Initialize FastAPI app
 
14
  redoc_url="/redoc"
15
  )
16
 
17
+ # Load the combined model pipeline (includes vectorizer)
18
  model = joblib.load("model.joblib")
 
19
 
20
  # Initialize NER pipeline
21
  ner = pipeline('ner', model='Davlan/xlm-roberta-base-ner-hrl', grouped_entities=True)
 
111
  def classify_email(data: EmailInput):
112
  raw_text = data.input_email_body
113
  masked_text, pii_map, entity_list = mask_and_store_all_pii(raw_text)
114
+ predicted_category = model.predict([masked_text])[0]
 
115
  return {
116
  "input_email_body": raw_text,
117
  "list_of_masked_entities": entity_list,
 
119
  "category_of_the_email": predicted_category
120
  }
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  # Health check
123
  @app.get("/")
124
  def root():