yamanavijayavardhan commited on
Commit
94589be
·
1 Parent(s): 6139662

printing extracted text19

Browse files
all_models.py CHANGED
@@ -124,27 +124,30 @@ class ModelSingleton:
124
  """Get ViT model with reference counting"""
125
  try:
126
  if self.vit_model is None:
127
- from transformers import ViTImageProcessor, ViTModel, AutoModelForImageClassification, AutoConfig
128
  logger.info("Loading ViT model...")
129
 
130
- # Get model path - fix to use project root
131
  import os
132
- project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
133
  model_path = os.path.join(project_root, 'models', 'vit-base-beans')
134
 
135
  logger.info(f"Looking for model at: {model_path}")
136
 
137
  if not os.path.exists(model_path):
138
  raise FileNotFoundError(f"Model path does not exist: {model_path}")
139
-
140
  # Check for model files
141
  model_files = os.listdir(model_path)
142
- logger.info(f"Found model files: {model_files}")
143
 
144
- if 'model.safetensors' not in model_files or 'config.json' not in model_files:
145
- raise FileNotFoundError(f"Required model files missing in {model_path}")
 
 
146
 
147
- # Create processor with explicit settings
148
  self.vit_processor = ViTImageProcessor(
149
  do_resize=True,
150
  size=224,
@@ -154,34 +157,35 @@ class ModelSingleton:
154
  )
155
 
156
  try:
157
- # Load model with explicit settings
158
- logger.info("Loading ViT model with safetensors...")
159
  self.vit_model = ViTModel.from_pretrained(
160
  model_path,
161
  local_files_only=True,
162
  use_safetensors=True,
163
  trust_remote_code=False
164
  )
165
- self.vit_model.to(self.device)
166
- self.vit_model.eval()
167
- logger.info("ViT model loaded successfully")
168
- except Exception as model_error:
169
- logger.error(f"Error loading model: {model_error}")
170
- # Try alternative loading method
171
  try:
172
- logger.info("Attempting alternative model loading...")
 
173
  self.vit_model = AutoModelForImageClassification.from_pretrained(
174
  model_path,
175
  local_files_only=True,
176
  use_safetensors=True,
177
  trust_remote_code=False
178
  )
179
- self.vit_model.to(self.device)
180
- self.vit_model.eval()
181
- logger.info("ViT model loaded successfully using alternative method")
182
- except Exception as alt_error:
183
- logger.error(f"Alternative loading also failed: {alt_error}")
184
- raise
 
 
 
 
185
 
186
  self._reference_counts['vit'] += 1
187
  return self.vit_model, self.vit_processor
 
124
  """Get ViT model with reference counting"""
125
  try:
126
  if self.vit_model is None:
127
+ from transformers import ViTImageProcessor, ViTModel, AutoModelForImageClassification
128
  logger.info("Loading ViT model...")
129
 
130
+ # Get model path - fix to use absolute path
131
  import os
132
+ current_file_path = os.path.abspath(__file__)
133
+ project_root = os.path.dirname(os.path.dirname(current_file_path))
134
  model_path = os.path.join(project_root, 'models', 'vit-base-beans')
135
 
136
  logger.info(f"Looking for model at: {model_path}")
137
 
138
  if not os.path.exists(model_path):
139
  raise FileNotFoundError(f"Model path does not exist: {model_path}")
140
+
141
  # Check for model files
142
  model_files = os.listdir(model_path)
143
+ logger.info(f"Found model files: {', '.join(model_files)}")
144
 
145
+ if 'model.safetensors' not in model_files:
146
+ raise FileNotFoundError(f"model.safetensors not found in {model_path}")
147
+ if 'config.json' not in model_files:
148
+ raise FileNotFoundError(f"config.json not found in {model_path}")
149
 
150
+ # Create processor
151
  self.vit_processor = ViTImageProcessor(
152
  do_resize=True,
153
  size=224,
 
157
  )
158
 
159
  try:
160
+ # First try loading as ViTModel
161
+ logger.info("Attempting to load as ViTModel...")
162
  self.vit_model = ViTModel.from_pretrained(
163
  model_path,
164
  local_files_only=True,
165
  use_safetensors=True,
166
  trust_remote_code=False
167
  )
168
+ except Exception as e1:
169
+ logger.warning(f"Failed to load as ViTModel: {e1}")
 
 
 
 
170
  try:
171
+ # Try loading as AutoModelForImageClassification
172
+ logger.info("Attempting to load as AutoModelForImageClassification...")
173
  self.vit_model = AutoModelForImageClassification.from_pretrained(
174
  model_path,
175
  local_files_only=True,
176
  use_safetensors=True,
177
  trust_remote_code=False
178
  )
179
+ except Exception as e2:
180
+ logger.error(f"Failed to load model using both methods")
181
+ logger.error(f"ViTModel error: {e1}")
182
+ logger.error(f"AutoModel error: {e2}")
183
+ raise Exception("Failed to load model using any available method")
184
+
185
+ # Move model to device and set to eval mode
186
+ self.vit_model.to(self.device)
187
+ self.vit_model.eval()
188
+ logger.info(f"ViT model loaded successfully and moved to {self.device}")
189
 
190
  self._reference_counts['vit'] += 1
191
  return self.vit_model, self.vit_processor
main.py CHANGED
@@ -34,24 +34,28 @@ def log_print(message, level="INFO"):
34
  })
35
 
36
  # Set environment variables before any other imports
37
- os.environ['TRANSFORMERS_CACHE'] = os.path.join(tempfile.gettempdir(), 'huggingface_cache')
38
- os.environ['HF_HOME'] = os.path.join(tempfile.gettempdir(), 'huggingface')
39
- os.environ['TORCH_HOME'] = os.path.join(tempfile.gettempdir(), 'torch')
40
- os.environ['XDG_CACHE_HOME'] = os.path.join(tempfile.gettempdir(), 'cache')
41
- os.environ['SENTENCE_TRANSFORMERS_HOME'] = os.path.join(tempfile.gettempdir(), 'sentence_transformers')
42
-
43
- # Create all necessary cache directories
 
 
44
  cache_dirs = {
 
45
  'transformers': os.environ['TRANSFORMERS_CACHE'],
46
  'hf': os.environ['HF_HOME'],
47
  'torch': os.environ['TORCH_HOME'],
48
  'cache': os.environ['XDG_CACHE_HOME'],
49
- 'sentence_transformers': os.environ['SENTENCE_TRANSFORMERS_HOME']
 
50
  }
51
 
52
  for cache_name, cache_dir in cache_dirs.items():
53
  try:
54
- os.makedirs(cache_dir, exist_ok=True)
55
  log_print(f"Created cache directory for {cache_name}: {cache_dir}")
56
  except Exception as e:
57
  log_print(f"Error creating {cache_name} cache directory: {e}", "ERROR")
 
34
  })
35
 
36
  # Set environment variables before any other imports
37
+ cache_root = os.path.join(os.path.expanduser('~'), '.cache', 'answer_grading_app')
38
+ os.environ['TRANSFORMERS_CACHE'] = os.path.join(cache_root, 'transformers')
39
+ os.environ['HF_HOME'] = os.path.join(cache_root, 'huggingface')
40
+ os.environ['TORCH_HOME'] = os.path.join(cache_root, 'torch')
41
+ os.environ['XDG_CACHE_HOME'] = os.path.join(cache_root, 'cache')
42
+ os.environ['SENTENCE_TRANSFORMERS_HOME'] = os.path.join(cache_root, 'sentence_transformers')
43
+ os.environ['GENSIM_DATA_DIR'] = os.path.join(cache_root, 'gensim')
44
+
45
+ # Create all necessary cache directories with proper permissions
46
  cache_dirs = {
47
+ 'root': cache_root,
48
  'transformers': os.environ['TRANSFORMERS_CACHE'],
49
  'hf': os.environ['HF_HOME'],
50
  'torch': os.environ['TORCH_HOME'],
51
  'cache': os.environ['XDG_CACHE_HOME'],
52
+ 'sentence_transformers': os.environ['SENTENCE_TRANSFORMERS_HOME'],
53
+ 'gensim': os.environ['GENSIM_DATA_DIR']
54
  }
55
 
56
  for cache_name, cache_dir in cache_dirs.items():
57
  try:
58
+ os.makedirs(cache_dir, mode=0o755, exist_ok=True)
59
  log_print(f"Created cache directory for {cache_name}: {cache_dir}")
60
  except Exception as e:
61
  log_print(f"Error creating {cache_name} cache directory: {e}", "ERROR")
similarity_check/semantic_meaning_check/semantic.py CHANGED
@@ -15,6 +15,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
15
  from all_models import models
16
  import torch
17
  import logging
 
18
 
19
  # Set up logging
20
  logging.basicConfig(level=logging.INFO)
@@ -24,24 +25,51 @@ logger = logging.getLogger(__name__)
24
  gensim_data_dir = os.getenv('GENSIM_DATA_DIR', tempfile.gettempdir())
25
  os.environ['GENSIM_DATA_DIR'] = gensim_data_dir
26
 
27
- # Load fasttext with error handling
28
- try:
29
- model_path = os.path.join(gensim_data_dir, 'fasttext-wiki-news-subwords-300', 'fasttext-wiki-news-subwords-300.gz')
30
- if os.path.exists(model_path):
31
- print("Loading fasttext model from cache...")
32
- fasttext = KeyedVectors.load_word2vec_format(model_path)
33
- else:
34
- print("Loading fasttext model...")
35
- fasttext = load('fasttext-wiki-news-subwords-300')
36
- except Exception as e:
37
- print(f"Error loading fasttext model: {e}")
38
- # Provide a fallback for similarity calculations
39
- class DummyFasttext:
40
- def __getitem__(self, word):
41
- return np.zeros(300) # Return zero vector of size 300
42
- def __contains__(self, word):
43
- return True
44
- fasttext = DummyFasttext()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  def question_vector_sentence(correct_answer):
47
  """Get sentence embedding using shared model"""
@@ -143,8 +171,8 @@ def question_vector_word(correct_answer):
143
  # Get word embeddings
144
  embeddings = []
145
  for word in words:
146
- if word in fasttext:
147
- embeddings.append(fasttext[word])
148
 
149
  if not embeddings:
150
  return np.zeros(300) # Return zero vector if no valid words
 
15
  from all_models import models
16
  import torch
17
  import logging
18
+ from utils import log_print
19
 
20
  # Set up logging
21
  logging.basicConfig(level=logging.INFO)
 
25
  gensim_data_dir = os.getenv('GENSIM_DATA_DIR', tempfile.gettempdir())
26
  os.environ['GENSIM_DATA_DIR'] = gensim_data_dir
27
 
28
+ def load_fasttext_model():
29
+ """Load FastText model with proper error handling"""
30
+ try:
31
+ # Get the model directory from environment variable or use default
32
+ model_dir = os.getenv('GENSIM_DATA_DIR', os.path.expanduser('~/.cache/gensim-data'))
33
+ model_path = os.path.join(model_dir, 'fasttext-wiki-news-subwords-300', 'fasttext-wiki-news-subwords-300.gz')
34
+
35
+ # Create directory if it doesn't exist
36
+ os.makedirs(os.path.dirname(model_path), exist_ok=True)
37
+
38
+ if os.path.exists(model_path):
39
+ log_print("Loading fasttext model from cache...")
40
+ return KeyedVectors.load_word2vec_format(model_path)
41
+ else:
42
+ log_print("Downloading fasttext model...")
43
+ import gensim.downloader as api
44
+ model = api.load('fasttext-wiki-news-subwords-300')
45
+
46
+ # Save the model for future use
47
+ os.makedirs(os.path.dirname(model_path), exist_ok=True)
48
+ model.save_word2vec_format(model_path)
49
+
50
+ return model
51
+ except Exception as e:
52
+ log_print(f"Error loading fasttext model: {str(e)}", "ERROR")
53
+ # Return a dummy model that provides basic word vector functionality
54
+ return DummyFasttext()
55
+
56
+ class DummyFasttext:
57
+ """Fallback class when FastText model fails to load"""
58
+ def __init__(self):
59
+ self.vector_size = 300
60
+ log_print("Using dummy FastText model due to loading error", "WARNING")
61
+
62
+ def get_vector(self, word):
63
+ return np.zeros(self.vector_size)
64
+
65
+ def __getitem__(self, word):
66
+ return self.get_vector(word)
67
+
68
+ def most_similar(self, word, topn=10):
69
+ return []
70
+
71
+ # Load the model once at module level
72
+ fasttext_model = load_fasttext_model()
73
 
74
  def question_vector_sentence(correct_answer):
75
  """Get sentence embedding using shared model"""
 
171
  # Get word embeddings
172
  embeddings = []
173
  for word in words:
174
+ if word in fasttext_model:
175
+ embeddings.append(fasttext_model[word])
176
 
177
  if not embeddings:
178
  return np.zeros(300) # Return zero vector if no valid words