yamanavijayavardhan commited on
Commit
8405423
·
1 Parent(s): 324bbc9

fix memory overlimit issue

Browse files
all_models.py CHANGED
@@ -12,6 +12,8 @@ logger = logging.getLogger(__name__)
12
  class ModelSingleton:
13
  _instance = None
14
  _initialized = False
 
 
15
 
16
  def __new__(cls):
17
  if cls._instance is None:
@@ -29,63 +31,113 @@ class ModelSingleton:
29
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
30
  logger.info(f"Using device: {self.device}")
31
 
32
- # Sentence transformer model
33
- try:
34
- logger.info("Loading sentence transformer model...")
35
- SENTENCE_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
36
- self.similarity_tokenizer = AutoTokenizer.from_pretrained(
37
- SENTENCE_MODEL,
38
- cache_dir=cache_dir
39
- )
40
- self.similarity_model = SentenceTransformer(
41
- SENTENCE_MODEL,
42
- cache_folder=cache_dir
43
- )
44
- self.similarity_model.to(self.device)
45
- logger.info("Sentence transformer model loaded successfully")
46
- except Exception as e:
47
- logger.error(f"Error loading sentence transformer model: {e}")
48
- raise
49
-
50
- # Flan-T5-xl model
51
- try:
52
- logger.info("Loading Flan-T5 model...")
53
- FLAN_MODEL = "google/flan-t5-xl"
54
- self.flan_tokenizer = AutoTokenizer.from_pretrained(
55
- FLAN_MODEL,
56
- cache_dir=cache_dir
57
- )
58
- self.flan_model = AutoModelForSeq2SeqLM.from_pretrained(
59
- FLAN_MODEL,
60
- cache_dir=cache_dir,
61
- torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
62
- )
63
- self.flan_model.to(self.device)
64
- logger.info("Flan-T5 model loaded successfully")
65
- except Exception as e:
66
- logger.error(f"Error loading Flan-T5 model: {e}")
67
- raise
68
 
69
  self._initialized = True
70
- logger.info("All models initialized successfully")
71
 
72
  except Exception as e:
73
  logger.error(f"Error during model initialization: {e}")
74
  raise
75
 
76
- def cleanup(self):
77
- """Clean up model resources"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  try:
79
- if hasattr(self, 'similarity_model'):
80
- del self.similarity_model
81
- if hasattr(self, 'flan_model'):
82
- del self.flan_model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  torch.cuda.empty_cache()
84
- logger.info("Model resources cleaned up successfully")
 
 
 
 
 
 
 
 
 
85
  except Exception as e:
86
  logger.error(f"Error during cleanup: {e}")
87
 
88
- # Create a global instance
89
  models = ModelSingleton()
90
 
91
  # Add cleanup function to the global instance
 
12
  class ModelSingleton:
13
  _instance = None
14
  _initialized = False
15
+ _models = {}
16
+ _reference_counts = {}
17
 
18
  def __new__(cls):
19
  if cls._instance is None:
 
31
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
32
  logger.info(f"Using device: {self.device}")
33
 
34
+ # Initialize with None values
35
+ self.similarity_tokenizer = None
36
+ self.similarity_model = None
37
+ self.flan_tokenizer = None
38
+ self.flan_model = None
39
+
40
+ # Initialize reference counts
41
+ self._reference_counts['similarity'] = 0
42
+ self._reference_counts['flan'] = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  self._initialized = True
45
+ logger.info("Model singleton initialized")
46
 
47
  except Exception as e:
48
  logger.error(f"Error during model initialization: {e}")
49
  raise
50
 
51
+ def get_similarity_model(self):
52
+ """Get sentence transformer model with reference counting"""
53
+ try:
54
+ if self.similarity_model is None:
55
+ logger.info("Loading sentence transformer model...")
56
+ SENTENCE_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
57
+ self.similarity_tokenizer = AutoTokenizer.from_pretrained(
58
+ SENTENCE_MODEL,
59
+ cache_dir=os.getenv('TRANSFORMERS_CACHE')
60
+ )
61
+ self.similarity_model = SentenceTransformer(
62
+ SENTENCE_MODEL,
63
+ cache_folder=os.getenv('TRANSFORMERS_CACHE')
64
+ )
65
+ self.similarity_model.to(self.device)
66
+ logger.info("Sentence transformer model loaded successfully")
67
+
68
+ self._reference_counts['similarity'] += 1
69
+ return self.similarity_model
70
+ except Exception as e:
71
+ logger.error(f"Error loading sentence transformer model: {e}")
72
+ raise
73
+
74
+ def get_flan_model(self):
75
+ """Get Flan-T5 model with reference counting"""
76
  try:
77
+ if self.flan_model is None:
78
+ logger.info("Loading Flan-T5 model...")
79
+ FLAN_MODEL = "google/flan-t5-xl"
80
+ self.flan_tokenizer = AutoTokenizer.from_pretrained(
81
+ FLAN_MODEL,
82
+ cache_dir=os.getenv('TRANSFORMERS_CACHE')
83
+ )
84
+ self.flan_model = AutoModelForSeq2SeqLM.from_pretrained(
85
+ FLAN_MODEL,
86
+ cache_dir=os.getenv('TRANSFORMERS_CACHE'),
87
+ torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
88
+ low_cpu_mem_usage=True
89
+ )
90
+ self.flan_model.to(self.device)
91
+ logger.info("Flan-T5 model loaded successfully")
92
+
93
+ self._reference_counts['flan'] += 1
94
+ return self.flan_model
95
+ except Exception as e:
96
+ logger.error(f"Error loading Flan-T5 model: {e}")
97
+ raise
98
+
99
+ def release_similarity_model(self):
100
+ """Release reference to similarity model"""
101
+ self._reference_counts['similarity'] -= 1
102
+ if self._reference_counts['similarity'] <= 0:
103
+ self._cleanup_similarity_model()
104
+
105
+ def release_flan_model(self):
106
+ """Release reference to Flan-T5 model"""
107
+ self._reference_counts['flan'] -= 1
108
+ if self._reference_counts['flan'] <= 0:
109
+ self._cleanup_flan_model()
110
+
111
+ def _cleanup_similarity_model(self):
112
+ """Clean up similarity model resources"""
113
+ if self.similarity_model is not None:
114
+ del self.similarity_model
115
+ self.similarity_model = None
116
+ self.similarity_tokenizer = None
117
+ torch.cuda.empty_cache()
118
+ logger.info("Similarity model resources cleaned up")
119
+
120
+ def _cleanup_flan_model(self):
121
+ """Clean up Flan-T5 model resources"""
122
+ if self.flan_model is not None:
123
+ del self.flan_model
124
+ self.flan_model = None
125
+ self.flan_tokenizer = None
126
  torch.cuda.empty_cache()
127
+ logger.info("Flan-T5 model resources cleaned up")
128
+
129
+ def cleanup(self):
130
+ """Clean up all model resources"""
131
+ try:
132
+ self._cleanup_similarity_model()
133
+ self._cleanup_flan_model()
134
+ self._reference_counts['similarity'] = 0
135
+ self._reference_counts['flan'] = 0
136
+ logger.info("All model resources cleaned up successfully")
137
  except Exception as e:
138
  logger.error(f"Error during cleanup: {e}")
139
 
140
+ # Create global instance
141
  models = ModelSingleton()
142
 
143
  # Add cleanup function to the global instance
main.py CHANGED
@@ -1,5 +1,7 @@
1
  import os
2
  import tempfile
 
 
3
 
4
  # Set up Hugging Face cache directory
5
  os.environ['TRANSFORMERS_CACHE'] = os.path.join(tempfile.gettempdir(), 'huggingface_cache')
@@ -420,6 +422,32 @@ def notifications():
420
 
421
  return Response(generate(), mimetype='text/event-stream')
422
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
  @app.route('/compute_marks', methods=['POST'])
424
  def compute_marks():
425
  try:
@@ -534,6 +562,9 @@ def compute_marks():
534
  })
535
  count += 1
536
 
 
 
 
537
  except Exception as e:
538
  logger.error(f"Error processing {image_path}: {str(e)}")
539
  results.append({
@@ -552,6 +583,9 @@ def compute_marks():
552
  except Exception as e:
553
  logger.warning(f"Could not clean up temporary files: {e}")
554
 
 
 
 
555
  return jsonify({"results": results}), 200
556
 
557
  except Exception as e:
@@ -561,6 +595,9 @@ def compute_marks():
561
  "message": error_msg
562
  })
563
  return jsonify({"error": error_msg}), 500
 
 
 
564
 
565
  def marks(answer, sen_vec_answers, word_vec_answers, tf_idf_word_values, max_tfidf, correct_answers):
566
  try:
 
1
  import os
2
  import tempfile
3
+ import gc
4
+ import psutil
5
 
6
  # Set up Hugging Face cache directory
7
  os.environ['TRANSFORMERS_CACHE'] = os.path.join(tempfile.gettempdir(), 'huggingface_cache')
 
422
 
423
  return Response(generate(), mimetype='text/event-stream')
424
 
425
+ def get_memory_usage():
426
+ """Get current memory usage"""
427
+ process = psutil.Process(os.getpid())
428
+ return process.memory_info().rss / 1024 / 1024 # Convert to MB
429
+
430
+ def cleanup_memory():
431
+ """Clean up memory by clearing caches and garbage collection"""
432
+ try:
433
+ # Clear PyTorch cache
434
+ if torch.cuda.is_available():
435
+ torch.cuda.empty_cache()
436
+
437
+ # Clear Python garbage collection
438
+ gc.collect()
439
+
440
+ # Clear model caches
441
+ if hasattr(models, 'cleanup'):
442
+ models.cleanup()
443
+
444
+ # Log memory usage
445
+ memory_usage = get_memory_usage()
446
+ log_print(f"Memory usage after cleanup: {memory_usage:.2f} MB")
447
+
448
+ except Exception as e:
449
+ log_print(f"Error during memory cleanup: {e}", "ERROR")
450
+
451
  @app.route('/compute_marks', methods=['POST'])
452
  def compute_marks():
453
  try:
 
562
  })
563
  count += 1
564
 
565
+ # Clean up memory after each student
566
+ cleanup_memory()
567
+
568
  except Exception as e:
569
  logger.error(f"Error processing {image_path}: {str(e)}")
570
  results.append({
 
583
  except Exception as e:
584
  logger.warning(f"Could not clean up temporary files: {e}")
585
 
586
+ # Final memory cleanup
587
+ cleanup_memory()
588
+
589
  return jsonify({"results": results}), 200
590
 
591
  except Exception as e:
 
595
  "message": error_msg
596
  })
597
  return jsonify({"error": error_msg}), 500
598
+ finally:
599
+ # Ensure memory is cleaned up even if there's an error
600
+ cleanup_memory()
601
 
602
  def marks(answer, sen_vec_answers, word_vec_answers, tf_idf_word_values, max_tfidf, correct_answers):
603
  try:
similarity_check/llm_based_scoring/llm.py CHANGED
@@ -23,69 +23,53 @@ def llm_score(correct_answers, answer):
23
  correct_answers = [correct_answers]
24
 
25
  score = []
26
-
 
 
 
 
 
27
  for correct_answer in correct_answers:
28
  try:
29
- prompt = (
30
- "You are an expert evaluator of answers. Your response must be a *single numeric score (0-10), not a range.*\n\n"
31
-
32
- "The user's answer has been converted from handwriting using OCR, so minor spelling, punctuation, or small word variations may exist. "
33
- "Focus on meaning rather than transcription errors.\n\n"
34
-
35
- "### Evaluation Criteria:\n"
36
- "- *Correctness (90% weight):* Does the answer accurately convey the meaning of the correct answer?\n"
37
- "- *Completeness (10% weight):* Does it cover all key points?\n\n"
38
-
39
- "### Handling OCR Errors:\n"
40
- "- Ignore minor spelling/punctuation mistakes that don't affect meaning.\n"
41
- "- Penalize only if word substitutions change the meaning.\n\n"
42
-
43
- "### Scoring Guidelines:\n"
44
- "- *10:* Fully correct and complete (90-100% accurate).\n"
45
- "- *From 9 to 8:* Mostly correct, minor missing details (80-90% accurate).\n"
46
- "- *From 7 to 6:* Good but missing some key points (60-80% accurate).\n"
47
- "- *From 5 to 4:* Average, with several omissions/errors (40-60% accurate).\n"
48
- "- *From 3 to 2:* Poor, major meaning errors (20-40% accurate).\n"
49
- "- *From 1 to 0:* Incorrect or irrelevant (less than 20% accurate).\n\n"
50
-
51
- "Compare the answers and assign a *single numeric score (0-10)* based on correctness and completeness.\n\n"
52
-
53
- "Correct answer:\n"
54
- f"{correct_answer}\n\n"
55
- "User's answer:\n"
56
- f"{answer}\n\n"
57
- "Final Score (numeric only, strictly between 0 and 10):")
58
-
59
- # Tokenize input prompt
60
- inputs = models.flan_tokenizer(prompt, return_tensors="pt").to(device)
61
 
62
- # Generate response
63
- with torch.no_grad():
64
- outputs = models.flan_model.generate(
65
  **inputs,
66
- max_length=2048,
67
- do_sample=True,
68
  num_return_sequences=1,
69
- num_beams=5,
70
- temperature=0.6,
71
- top_p=0.9,
72
- early_stopping=True,
73
- pad_token_id=models.flan_tokenizer.pad_token_id,
74
- eos_token_id=models.flan_tokenizer.eos_token_id,
75
- bos_token_id=models.flan_tokenizer.bos_token_id,
76
  )
77
-
78
- # Decode and print response
79
- response = models.flan_tokenizer.decode(outputs[0], skip_special_tokens=True)
80
- print(response)
81
- score.append(response)
 
 
 
 
 
82
 
83
  except Exception as e:
84
- print(f"Error processing individual answer: {str(e)}")
85
- score.append("0")
86
 
 
 
 
 
 
87
  return score
88
 
89
  except Exception as e:
90
- print(f"Error in llm_score: {str(e)}")
91
- return ["0"]
 
 
 
 
23
  correct_answers = [correct_answers]
24
 
25
  score = []
26
+
27
+ # Get model instance
28
+ model = models.get_flan_model()
29
+ tokenizer = models.flan_tokenizer
30
+
31
+ # Process each correct answer
32
  for correct_answer in correct_answers:
33
  try:
34
+ # Prepare input
35
+ input_text = f"Compare these answers and give a similarity score between 0 and 1:\nCorrect: {correct_answer}\nStudent: {answer}"
36
+ inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
37
+ inputs = {k: v.to(models.device) for k, v in inputs.items()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ # Generate score
40
+ with torch.no_grad(): # Disable gradient calculation
41
+ outputs = model.generate(
42
  **inputs,
43
+ max_length=50,
 
44
  num_return_sequences=1,
45
+ temperature=0.7,
46
+ do_sample=True
 
 
 
 
 
47
  )
48
+
49
+ # Decode and extract score
50
+ score_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
51
+ try:
52
+ # Try to extract numeric score
53
+ score_value = float(score_text.split()[-1])
54
+ score.append(min(max(score_value, 0.0), 1.0)) # Clamp between 0 and 1
55
+ except (ValueError, IndexError):
56
+ # If no numeric score found, use default
57
+ score.append(0.5)
58
 
59
  except Exception as e:
60
+ logger.error(f"Error processing answer: {str(e)}")
61
+ score.append(0.5) # Use default score on error
62
 
63
+ # Clean up tensors
64
+ del inputs
65
+ del outputs
66
+ torch.cuda.empty_cache()
67
+
68
  return score
69
 
70
  except Exception as e:
71
+ logger.error(f"Error in llm_score: {str(e)}")
72
+ return [0.5] # Return default score on error
73
+ finally:
74
+ # Release model reference
75
+ models.release_flan_model()
similarity_check/semantic_meaning_check/semantic.py CHANGED
@@ -41,22 +41,44 @@ except Exception as e:
41
  def question_vector_sentence(correct_answer):
42
  """Get sentence embedding using shared model"""
43
  try:
44
- return models.similarity_model.encode(correct_answer, convert_to_tensor=True)
 
 
 
 
45
  except Exception as e:
46
  logger.error(f"Error in question_vector_sentence: {str(e)}")
47
  return None
 
 
 
48
 
49
- def similarity_model_score(correct_answer_vector, answer):
50
- """Calculate similarity score using shared model"""
51
  try:
52
- answer_embedding = models.similarity_model.encode(answer, convert_to_tensor=True)
53
- cosine_score = float('-inf')
54
- for i in correct_answer_vector:
55
- cosine_score = max(cosine_score, util.pytorch_cos_sim(i, answer_embedding))
56
- return cosine_score
 
 
 
 
 
 
 
 
 
 
 
 
57
  except Exception as e:
58
  logger.error(f"Error in similarity_model_score: {str(e)}")
59
  return 0.0
 
 
 
60
 
61
  def preprocess(sentence):
62
  """Preprocess text by tokenizing and removing stopwords"""
@@ -106,23 +128,44 @@ def compute_scm(tokens1, tokens2, model):
106
  return 0.5 # Return default similarity score
107
 
108
  def question_vector_word(correct_answer):
109
- """Get preprocessed word tokens"""
110
  try:
111
- return preprocess(correct_answer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  except Exception as e:
113
  logger.error(f"Error in question_vector_word: {str(e)}")
114
- return []
115
 
116
- def fasttext_similarity(correct_answer_vector, answer):
117
- """Compute fasttext-based similarity between answers"""
118
  try:
119
- preprocess_answer = preprocess(answer)
120
- soft_cosine = float('-inf')
121
 
122
- for i in correct_answer_vector:
123
- soft_cosine = max(compute_scm(i, preprocess_answer, fasttext), soft_cosine)
 
 
 
 
124
 
125
- return soft_cosine
 
 
 
126
  except Exception as e:
127
  logger.error(f"Error in fasttext_similarity: {str(e)}")
128
  return 0.0
 
41
  def question_vector_sentence(correct_answer):
42
  """Get sentence embedding using shared model"""
43
  try:
44
+ # Get model instance
45
+ model = models.get_similarity_model()
46
+ # Convert to tensor and move to correct device
47
+ embedding = model.encode(correct_answer, convert_to_tensor=True, device=models.device)
48
+ return embedding
49
  except Exception as e:
50
  logger.error(f"Error in question_vector_sentence: {str(e)}")
51
  return None
52
+ finally:
53
+ # Release model reference
54
+ models.release_similarity_model()
55
 
56
+ def similarity_model_score(sentence_vectors, answer):
57
+ """Calculate similarity score using sentence transformer"""
58
  try:
59
+ # Get model instance
60
+ model = models.get_similarity_model()
61
+
62
+ # Get answer embedding
63
+ answer_embedding = model.encode(answer, convert_to_tensor=True, device=models.device)
64
+
65
+ # Calculate similarities
66
+ similarities = []
67
+ for vec in sentence_vectors:
68
+ if vec is not None:
69
+ similarity = util.pytorch_cos_sim(answer_embedding, vec).item()
70
+ similarities.append(similarity)
71
+
72
+ if not similarities:
73
+ return 0.0
74
+
75
+ return max(similarities)
76
  except Exception as e:
77
  logger.error(f"Error in similarity_model_score: {str(e)}")
78
  return 0.0
79
+ finally:
80
+ # Release model reference
81
+ models.release_similarity_model()
82
 
83
  def preprocess(sentence):
84
  """Preprocess text by tokenizing and removing stopwords"""
 
128
  return 0.5 # Return default similarity score
129
 
130
  def question_vector_word(correct_answer):
131
+ """Get word embeddings using FastText"""
132
  try:
133
+ # Tokenize and remove stopwords
134
+ stop_words = set(stopwords.words('english'))
135
+ words = word_tokenize(correct_answer.lower())
136
+ words = [w for w in words if w not in stop_words]
137
+
138
+ # Get word embeddings
139
+ embeddings = []
140
+ for word in words:
141
+ if word in fasttext:
142
+ embeddings.append(fasttext[word])
143
+
144
+ if not embeddings:
145
+ return np.zeros(300) # Return zero vector if no valid words
146
+
147
+ return np.mean(embeddings, axis=0)
148
  except Exception as e:
149
  logger.error(f"Error in question_vector_word: {str(e)}")
150
+ return np.zeros(300)
151
 
152
+ def fasttext_similarity(word_vectors, answer):
153
+ """Calculate similarity score using FastText word embeddings"""
154
  try:
155
+ # Get answer word embedding
156
+ answer_embedding = question_vector_word(answer)
157
 
158
+ # Calculate similarities
159
+ similarities = []
160
+ for vec in word_vectors:
161
+ if vec is not None:
162
+ similarity = np.dot(answer_embedding, vec) / (np.linalg.norm(answer_embedding) * np.linalg.norm(vec))
163
+ similarities.append(similarity)
164
 
165
+ if not similarities:
166
+ return 0.0
167
+
168
+ return max(similarities)
169
  except Exception as e:
170
  logger.error(f"Error in fasttext_similarity: {str(e)}")
171
  return 0.0