yamanavijayavardhan commited on
Commit
2992571
·
1 Parent(s): 088b9f3

printing extracted text21

Browse files
all_models.py CHANGED
@@ -10,6 +10,26 @@ import shutil
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def check_directory_permissions(path):
14
  """Check if directory exists and has correct permissions"""
15
  try:
@@ -17,17 +37,9 @@ def check_directory_permissions(path):
17
  logger.warning(f"Directory does not exist: {path}")
18
  return False
19
 
20
- # Try to create a test file
21
- test_file = os.path.join(path, '.permission_test')
22
- try:
23
- with open(test_file, 'w') as f:
24
- f.write('test')
25
- os.remove(test_file)
26
- logger.info(f"Directory {path} is writable")
27
- return True
28
- except Exception as e:
29
- logger.error(f"Directory {path} is not writable: {e}")
30
- return False
31
  except Exception as e:
32
  logger.error(f"Error checking permissions for {path}: {e}")
33
  return False
@@ -43,40 +55,46 @@ def get_cache_dir():
43
  cache_dir = os.path.join(home_dir, '.cache', 'answer_grading_app')
44
  logger.info(f"Attempting to use cache directory: {cache_dir}")
45
 
46
- # Create directory with proper permissions
47
- os.makedirs(cache_dir, mode=0o755, exist_ok=True)
 
48
 
49
- # Verify permissions
50
- if check_directory_permissions(cache_dir):
51
- logger.info(f"Successfully created and verified cache directory: {cache_dir}")
52
- return cache_dir
53
-
54
- # If home directory fails, try temp directory
 
55
  temp_dir = os.path.join(tempfile.gettempdir(), 'answer_grading_app')
56
  logger.info(f"Attempting to use temporary directory: {temp_dir}")
57
 
58
- os.makedirs(temp_dir, mode=0o755, exist_ok=True)
59
- if check_directory_permissions(temp_dir):
60
- logger.info(f"Using temporary directory: {temp_dir}")
61
- return temp_dir
62
-
63
- # Last resort: use current directory
 
 
 
 
64
  current_dir = os.path.join(os.getcwd(), '.cache')
65
  logger.info(f"Attempting to use current directory: {current_dir}")
66
 
67
- os.makedirs(current_dir, mode=0o755, exist_ok=True)
68
- if check_directory_permissions(current_dir):
69
- logger.info(f"Using current directory: {current_dir}")
70
- return current_dir
71
-
72
- raise Exception("Could not find a writable cache directory")
73
 
 
 
74
  except Exception as e:
75
- logger.error(f"Error setting up cache directory: {e}")
76
- # Create a new temporary directory as last resort
77
- temp_dir = tempfile.mkdtemp()
78
- logger.info(f"Created temporary directory as fallback: {temp_dir}")
79
- return temp_dir
 
 
80
 
81
  class ModelSingleton:
82
  _instance = None
@@ -108,24 +126,55 @@ class ModelSingleton:
108
  'fasttext': os.path.join(self.cache_dir, 'fasttext')
109
  }
110
 
111
- # Create and verify each cache directory
112
  for name, path in self.cache_dirs.items():
113
  try:
114
- os.makedirs(path, mode=0o755, exist_ok=True)
115
- if check_directory_permissions(path):
116
- logger.info(f"Successfully created {name} cache directory: {path}")
117
- else:
118
- logger.error(f"Failed to verify permissions for {name} cache directory")
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  except Exception as e:
120
  logger.error(f"Error creating {name} cache directory: {e}")
 
 
 
 
 
 
121
 
122
- # Set environment variables
123
  os.environ['TRANSFORMERS_CACHE'] = self.cache_dirs['transformers']
124
  os.environ['HF_HOME'] = self.cache_dirs['huggingface']
125
  os.environ['TORCH_HOME'] = self.cache_dirs['torch']
126
  os.environ['XDG_CACHE_HOME'] = self.cache_dirs['cache']
127
  os.environ['SENTENCE_TRANSFORMERS_HOME'] = self.cache_dirs['sentence_transformers']
128
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  # Get device
130
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
131
  logger.info(f"Using device: {self.device}")
@@ -236,27 +285,45 @@ class ModelSingleton:
236
 
237
  logger.info(f"Looking for model at: {model_path}")
238
 
 
239
  if not os.path.exists(model_path):
240
  raise FileNotFoundError(f"Model path does not exist: {model_path}")
241
 
242
- # Check for model files
 
 
 
243
  model_files = os.listdir(model_path)
244
  logger.info(f"Found model files: {', '.join(model_files)}")
245
 
246
- if 'model.safetensors' not in model_files:
247
- raise FileNotFoundError(f"model.safetensors not found in {model_path}")
248
- if 'config.json' not in model_files:
249
- raise FileNotFoundError(f"config.json not found in {model_path}")
 
 
 
 
 
 
 
 
 
250
 
251
- # Create processor
252
  self.vit_processor = ViTImageProcessor(
253
  do_resize=True,
254
  size=224,
255
  do_normalize=True,
256
  image_mean=[0.5, 0.5, 0.5],
257
- image_std=[0.5, 0.5, 0.5]
 
258
  )
259
 
 
 
 
 
 
260
  try:
261
  # First try loading as ViTModel
262
  logger.info("Attempting to load as ViTModel...")
@@ -264,7 +331,8 @@ class ModelSingleton:
264
  model_path,
265
  local_files_only=True,
266
  use_safetensors=True,
267
- trust_remote_code=False
 
268
  )
269
  except Exception as e1:
270
  logger.warning(f"Failed to load as ViTModel: {e1}")
@@ -275,7 +343,8 @@ class ModelSingleton:
275
  model_path,
276
  local_files_only=True,
277
  use_safetensors=True,
278
- trust_remote_code=False
 
279
  )
280
  except Exception as e2:
281
  logger.error(f"Failed to load model using both methods")
 
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
12
 
13
+ def ensure_full_permissions(path):
14
+ """Grant full permissions to a file or directory"""
15
+ try:
16
+ if os.path.isdir(path):
17
+ # Full permissions for directories (rwxrwxrwx)
18
+ os.chmod(path, 0o777)
19
+ # Apply to all contents recursively
20
+ for root, dirs, files in os.walk(path):
21
+ for d in dirs:
22
+ os.chmod(os.path.join(root, d), 0o777)
23
+ for f in files:
24
+ os.chmod(os.path.join(root, f), 0o666)
25
+ else:
26
+ # Full permissions for files (rw-rw-rw-)
27
+ os.chmod(path, 0o666)
28
+ return True
29
+ except Exception as e:
30
+ logger.error(f"Error setting permissions for {path}: {e}")
31
+ return False
32
+
33
  def check_directory_permissions(path):
34
  """Check if directory exists and has correct permissions"""
35
  try:
 
37
  logger.warning(f"Directory does not exist: {path}")
38
  return False
39
 
40
+ # Set full permissions
41
+ ensure_full_permissions(path)
42
+ return True
 
 
 
 
 
 
 
 
43
  except Exception as e:
44
  logger.error(f"Error checking permissions for {path}: {e}")
45
  return False
 
55
  cache_dir = os.path.join(home_dir, '.cache', 'answer_grading_app')
56
  logger.info(f"Attempting to use cache directory: {cache_dir}")
57
 
58
+ # Create directory with full permissions
59
+ os.makedirs(cache_dir, mode=0o777, exist_ok=True)
60
+ ensure_full_permissions(cache_dir)
61
 
62
+ logger.info(f"Successfully created and verified cache directory: {cache_dir}")
63
+ return cache_dir
64
+ except Exception as e:
65
+ logger.warning(f"Could not use home directory cache: {e}")
66
+
67
+ # Try temp directory
68
+ try:
69
  temp_dir = os.path.join(tempfile.gettempdir(), 'answer_grading_app')
70
  logger.info(f"Attempting to use temporary directory: {temp_dir}")
71
 
72
+ os.makedirs(temp_dir, mode=0o777, exist_ok=True)
73
+ ensure_full_permissions(temp_dir)
74
+
75
+ logger.info(f"Using temporary directory: {temp_dir}")
76
+ return temp_dir
77
+ except Exception as e:
78
+ logger.warning(f"Could not use temp directory: {e}")
79
+
80
+ # Last resort: use current directory
81
+ try:
82
  current_dir = os.path.join(os.getcwd(), '.cache')
83
  logger.info(f"Attempting to use current directory: {current_dir}")
84
 
85
+ os.makedirs(current_dir, mode=0o777, exist_ok=True)
86
+ ensure_full_permissions(current_dir)
 
 
 
 
87
 
88
+ logger.info(f"Using current directory: {current_dir}")
89
+ return current_dir
90
  except Exception as e:
91
+ logger.error(f"Could not create any cache directory: {e}")
92
+
93
+ # If all else fails, use a new temporary directory
94
+ temp_dir = tempfile.mkdtemp()
95
+ ensure_full_permissions(temp_dir)
96
+ logger.info(f"Created temporary directory as last resort: {temp_dir}")
97
+ return temp_dir
98
 
99
  class ModelSingleton:
100
  _instance = None
 
126
  'fasttext': os.path.join(self.cache_dir, 'fasttext')
127
  }
128
 
129
+ # Create and verify each cache directory with full permissions
130
  for name, path in self.cache_dirs.items():
131
  try:
132
+ # Create directory with full permissions
133
+ os.makedirs(path, mode=0o777, exist_ok=True)
134
+ ensure_full_permissions(path)
135
+ logger.info(f"Successfully created {name} cache directory: {path}")
136
+
137
+ # Create a test file to verify write permissions
138
+ test_file = os.path.join(path, '.write_test')
139
+ try:
140
+ with open(test_file, 'w') as f:
141
+ f.write('test')
142
+ os.chmod(test_file, 0o666) # Full read/write for test file
143
+ os.remove(test_file) # Clean up
144
+ logger.info(f"Verified write permissions for {name} cache directory")
145
+ except Exception as e:
146
+ logger.error(f"Failed to verify write permissions for {name} cache directory: {e}")
147
+ # Try to fix permissions
148
+ ensure_full_permissions(path)
149
+
150
  except Exception as e:
151
  logger.error(f"Error creating {name} cache directory: {e}")
152
+ # Try to create in temp directory as fallback
153
+ temp_path = os.path.join(tempfile.gettempdir(), 'answer_grading_app', name)
154
+ os.makedirs(temp_path, mode=0o777, exist_ok=True)
155
+ ensure_full_permissions(temp_path)
156
+ self.cache_dirs[name] = temp_path
157
+ logger.info(f"Using fallback directory for {name}: {temp_path}")
158
 
159
+ # Set environment variables with verified directories
160
  os.environ['TRANSFORMERS_CACHE'] = self.cache_dirs['transformers']
161
  os.environ['HF_HOME'] = self.cache_dirs['huggingface']
162
  os.environ['TORCH_HOME'] = self.cache_dirs['torch']
163
  os.environ['XDG_CACHE_HOME'] = self.cache_dirs['cache']
164
  os.environ['SENTENCE_TRANSFORMERS_HOME'] = self.cache_dirs['sentence_transformers']
165
 
166
+ # Verify environment variables are set correctly
167
+ for env_var, path in [
168
+ ('TRANSFORMERS_CACHE', 'transformers'),
169
+ ('HF_HOME', 'huggingface'),
170
+ ('TORCH_HOME', 'torch'),
171
+ ('XDG_CACHE_HOME', 'cache'),
172
+ ('SENTENCE_TRANSFORMERS_HOME', 'sentence_transformers')
173
+ ]:
174
+ if os.environ.get(env_var) != self.cache_dirs[path]:
175
+ logger.warning(f"Environment variable {env_var} does not match expected path")
176
+ os.environ[env_var] = self.cache_dirs[path]
177
+
178
  # Get device
179
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
180
  logger.info(f"Using device: {self.device}")
 
285
 
286
  logger.info(f"Looking for model at: {model_path}")
287
 
288
+ # Ensure model directory exists and has proper permissions
289
  if not os.path.exists(model_path):
290
  raise FileNotFoundError(f"Model path does not exist: {model_path}")
291
 
292
+ # Set full permissions for model directory
293
+ ensure_full_permissions(model_path)
294
+
295
+ # Check for model files and set their permissions
296
  model_files = os.listdir(model_path)
297
  logger.info(f"Found model files: {', '.join(model_files)}")
298
 
299
+ required_files = ['model.safetensors', 'config.json']
300
+ for file in required_files:
301
+ file_path = os.path.join(model_path, file)
302
+ if not os.path.exists(file_path):
303
+ raise FileNotFoundError(f"{file} not found in {model_path}")
304
+ # Set full permissions for model files
305
+ ensure_full_permissions(file_path)
306
+ logger.info(f"Set permissions for {file}")
307
+
308
+ # Create processor with proper cache directory
309
+ processor_cache = os.path.join(self.cache_dirs['transformers'], 'vit_processor')
310
+ os.makedirs(processor_cache, mode=0o777, exist_ok=True)
311
+ ensure_full_permissions(processor_cache)
312
 
 
313
  self.vit_processor = ViTImageProcessor(
314
  do_resize=True,
315
  size=224,
316
  do_normalize=True,
317
  image_mean=[0.5, 0.5, 0.5],
318
+ image_std=[0.5, 0.5, 0.5],
319
+ cache_dir=processor_cache
320
  )
321
 
322
+ # Try loading model with proper cache directory
323
+ model_cache = os.path.join(self.cache_dirs['transformers'], 'vit_model')
324
+ os.makedirs(model_cache, mode=0o777, exist_ok=True)
325
+ ensure_full_permissions(model_cache)
326
+
327
  try:
328
  # First try loading as ViTModel
329
  logger.info("Attempting to load as ViTModel...")
 
331
  model_path,
332
  local_files_only=True,
333
  use_safetensors=True,
334
+ trust_remote_code=False,
335
+ cache_dir=model_cache
336
  )
337
  except Exception as e1:
338
  logger.warning(f"Failed to load as ViTModel: {e1}")
 
343
  model_path,
344
  local_files_only=True,
345
  use_safetensors=True,
346
+ trust_remote_code=False,
347
+ cache_dir=model_cache
348
  )
349
  except Exception as e2:
350
  logger.error(f"Failed to load model using both methods")
main.py CHANGED
@@ -33,32 +33,57 @@ def log_print(message, level="INFO"):
33
  "message": message
34
  })
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  # Set environment variables before any other imports
37
- cache_root = os.path.join(os.path.expanduser('~'), '.cache', 'answer_grading_app')
38
- os.environ['TRANSFORMERS_CACHE'] = os.path.join(cache_root, 'transformers')
39
- os.environ['HF_HOME'] = os.path.join(cache_root, 'huggingface')
40
- os.environ['TORCH_HOME'] = os.path.join(cache_root, 'torch')
41
- os.environ['XDG_CACHE_HOME'] = os.path.join(cache_root, 'cache')
42
- os.environ['SENTENCE_TRANSFORMERS_HOME'] = os.path.join(cache_root, 'sentence_transformers')
43
- os.environ['GENSIM_DATA_DIR'] = os.path.join(cache_root, 'gensim')
44
-
45
- # Create all necessary cache directories with proper permissions
46
  cache_dirs = {
47
- 'root': cache_root,
48
- 'transformers': os.environ['TRANSFORMERS_CACHE'],
49
- 'hf': os.environ['HF_HOME'],
50
- 'torch': os.environ['TORCH_HOME'],
51
- 'cache': os.environ['XDG_CACHE_HOME'],
52
- 'sentence_transformers': os.environ['SENTENCE_TRANSFORMERS_HOME'],
53
- 'gensim': os.environ['GENSIM_DATA_DIR']
 
 
 
 
 
54
  }
55
 
56
- for cache_name, cache_dir in cache_dirs.items():
 
57
  try:
58
- os.makedirs(cache_dir, mode=0o755, exist_ok=True)
59
- log_print(f"Created cache directory for {cache_name}: {cache_dir}")
60
  except Exception as e:
61
- log_print(f"Error creating {cache_name} cache directory: {e}", "ERROR")
 
 
 
 
 
 
 
 
 
62
 
63
  # Now import the rest of the dependencies
64
  import sys
@@ -127,17 +152,6 @@ if torch_error:
127
  # Add the project root directory to Python path
128
  sys.path.append(os.path.dirname(os.path.abspath(__file__)))
129
 
130
- # Create cache directory if it doesn't exist
131
- BASE_DIR = '/tmp' # Use direct /tmp path for Hugging Face
132
- log_dir = os.path.join(BASE_DIR, 'app_logs')
133
- cache_dir = os.path.join(BASE_DIR, 'app_cache')
134
- nltk_data_dir = os.path.join(BASE_DIR, 'nltk_data')
135
- gensim_data_dir = os.path.join(BASE_DIR, 'gensim-data')
136
- upload_dir = os.path.join(BASE_DIR, 'uploads')
137
- ans_image_dir = os.path.join(BASE_DIR, 'ans_image')
138
- images_dir = os.path.join(BASE_DIR, 'images')
139
- log_file = os.path.join(log_dir, 'app.log') # Add log file path
140
-
141
  # Global variables for model caching and initialization status
142
  global_models = {}
143
  initialization_complete = Event()
@@ -145,38 +159,40 @@ initialization_complete = Event()
145
  # Initialize model singleton
146
  models = ModelSingleton()
147
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  def ensure_directory(path):
149
- """Create directory and ensure full permissions with better error handling"""
150
- if os.path.exists(path):
151
- try:
152
- # Test write permissions
153
- test_file = os.path.join(path, '.test')
154
- with open(test_file, 'w') as f:
155
- f.write('test')
156
- os.remove(test_file)
157
- return path
158
- except Exception as e:
159
- log_print(f"Warning: Directory exists but not writable: {path}", "WARNING")
160
- try:
161
- # Try to fix permissions
162
- os.chmod(path, 0o777)
163
- return path
164
- except Exception as chmod_e:
165
- log_print(f"Error fixing permissions for {path}: {chmod_e}", "ERROR")
166
- raise
167
-
168
  try:
 
 
 
 
169
  # Create directory with full permissions
170
  os.makedirs(path, mode=0o777, exist_ok=True)
 
171
  return path
172
  except Exception as e:
173
- try:
174
- # Try with more restricted permissions
175
- os.makedirs(path, mode=0o755, exist_ok=True)
176
- return path
177
- except Exception as nested_e:
178
- log_print(f"Error creating directory {path}: {nested_e}", "ERROR")
179
- raise
180
 
181
  def get_or_load_model(model_name):
182
  """Get a model from cache or load it if not present"""
@@ -185,7 +201,7 @@ def get_or_load_model(model_name):
185
  if model_name == 'fasttext':
186
  from gensim.models import KeyedVectors
187
  log_print(f"Loading {model_name} model...")
188
- model_path = os.path.join(gensim_data_dir, 'fasttext-wiki-news-subwords-300', 'fasttext-wiki-news-subwords-300.gz')
189
  model_dir = os.path.dirname(model_path)
190
 
191
  try:
@@ -267,7 +283,7 @@ def initialize_resources():
267
  """Initialize all required resources"""
268
  try:
269
  # Create essential directories first
270
- for directory in [nltk_data_dir, gensim_data_dir]:
271
  ensure_directory(directory)
272
 
273
  # Initialize NLTK
@@ -278,7 +294,7 @@ def initialize_resources():
278
  except LookupError:
279
  try:
280
  log_print(f"Downloading NLTK data: {data}")
281
- nltk.download(data, download_dir=nltk_data_dir, quiet=True)
282
  except Exception as e:
283
  log_print(f"Error downloading NLTK data {data}: {e}", "WARNING")
284
  continue
@@ -300,16 +316,20 @@ def initialize_resources():
300
  initialization_complete.set()
301
 
302
  # Create essential directories
303
- essential_dirs = [cache_dir, upload_dir, images_dir]
304
  for directory in essential_dirs:
305
  ensure_directory(directory)
306
-
307
- # Set environment variables
308
- os.environ['HF_HOME'] = cache_dir
309
- os.environ['GENSIM_DATA_DIR'] = gensim_data_dir
310
 
311
  # Add the custom directory to NLTK's search path
312
- nltk.data.path.insert(0, nltk_data_dir)
 
 
 
 
313
 
314
  # Start initialization in background
315
  initialization_thread = Thread(target=initialize_resources, daemon=True)
@@ -807,9 +827,10 @@ def marks(answer, sen_vec_answers, word_vec_answers, tf_idf_word_values, max_tfi
807
  def check_logs():
808
  try:
809
  # Ensure log directory exists
810
- ensure_directory(log_dir)
811
 
812
  # If log file doesn't exist, create it
 
813
  if not os.path.exists(log_file):
814
  with open(log_file, 'w') as f:
815
  f.write("Log file created.\n")
@@ -869,10 +890,10 @@ def cleanup_temp_files():
869
  shutil.rmtree(temp_processing_dir, ignore_errors=True)
870
 
871
  # Clean up the images directory
872
- if os.path.exists(images_dir):
873
- for file in os.listdir(images_dir):
874
  try:
875
- file_path = os.path.join(images_dir, file)
876
  if os.path.isfile(file_path):
877
  os.unlink(file_path)
878
  except Exception as e:
 
33
  "message": message
34
  })
35
 
36
+ def get_user_cache_dir():
37
+ """Get a user-accessible cache directory"""
38
+ try:
39
+ # Try user's home directory first
40
+ user_cache = os.path.join(os.path.expanduser('~'), '.cache', 'answer_grading_app')
41
+ if not os.path.exists(user_cache):
42
+ os.makedirs(user_cache, mode=0o755, exist_ok=True)
43
+ return user_cache
44
+ except Exception as e:
45
+ log_print(f"Error creating user cache directory: {e}", "WARNING")
46
+ # Fallback to temp directory
47
+ temp_dir = os.path.join(tempfile.gettempdir(), 'answer_grading_app')
48
+ os.makedirs(temp_dir, mode=0o755, exist_ok=True)
49
+ return temp_dir
50
+
51
+ # Set up base directories
52
+ BASE_DIR = get_user_cache_dir()
53
+ log_print(f"Using base directory: {BASE_DIR}")
54
+
55
  # Set environment variables before any other imports
 
 
 
 
 
 
 
 
 
56
  cache_dirs = {
57
+ 'root': BASE_DIR,
58
+ 'transformers': os.path.join(BASE_DIR, 'transformers'),
59
+ 'hf': os.path.join(BASE_DIR, 'huggingface'),
60
+ 'torch': os.path.join(BASE_DIR, 'torch'),
61
+ 'cache': os.path.join(BASE_DIR, 'cache'),
62
+ 'sentence_transformers': os.path.join(BASE_DIR, 'sentence_transformers'),
63
+ 'gensim': os.path.join(BASE_DIR, 'gensim'),
64
+ 'nltk': os.path.join(BASE_DIR, 'nltk_data'),
65
+ 'logs': os.path.join(BASE_DIR, 'logs'),
66
+ 'uploads': os.path.join(BASE_DIR, 'uploads'),
67
+ 'images': os.path.join(BASE_DIR, 'images'),
68
+ 'ans_image': os.path.join(BASE_DIR, 'ans_image')
69
  }
70
 
71
+ # Create all necessary directories with proper permissions
72
+ for name, path in cache_dirs.items():
73
  try:
74
+ os.makedirs(path, mode=0o755, exist_ok=True)
75
+ log_print(f"Created directory: {path}")
76
  except Exception as e:
77
+ log_print(f"Error creating directory {name}: {e}", "ERROR")
78
+
79
+ # Set environment variables
80
+ os.environ['TRANSFORMERS_CACHE'] = cache_dirs['transformers']
81
+ os.environ['HF_HOME'] = cache_dirs['hf']
82
+ os.environ['TORCH_HOME'] = cache_dirs['torch']
83
+ os.environ['XDG_CACHE_HOME'] = cache_dirs['cache']
84
+ os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dirs['sentence_transformers']
85
+ os.environ['GENSIM_DATA_DIR'] = cache_dirs['gensim']
86
+ os.environ['NLTK_DATA'] = cache_dirs['nltk']
87
 
88
  # Now import the rest of the dependencies
89
  import sys
 
152
  # Add the project root directory to Python path
153
  sys.path.append(os.path.dirname(os.path.abspath(__file__)))
154
 
 
 
 
 
 
 
 
 
 
 
 
155
  # Global variables for model caching and initialization status
156
  global_models = {}
157
  initialization_complete = Event()
 
159
  # Initialize model singleton
160
  models = ModelSingleton()
161
 
162
+ def ensure_full_permissions(path):
163
+ """Grant full permissions to a file or directory"""
164
+ try:
165
+ if os.path.isdir(path):
166
+ # Full permissions for directories (rwxrwxrwx)
167
+ os.chmod(path, 0o777)
168
+ # Apply to all contents recursively
169
+ for root, dirs, files in os.walk(path):
170
+ for d in dirs:
171
+ os.chmod(os.path.join(root, d), 0o777)
172
+ for f in files:
173
+ os.chmod(os.path.join(root, f), 0o666)
174
+ else:
175
+ # Full permissions for files (rw-rw-rw-)
176
+ os.chmod(path, 0o666)
177
+ return True
178
+ except Exception as e:
179
+ log_print(f"Error setting permissions for {path}: {e}", "ERROR")
180
+ return False
181
+
182
  def ensure_directory(path):
183
+ """Create directory and ensure full permissions"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  try:
185
+ if os.path.exists(path):
186
+ ensure_full_permissions(path)
187
+ return path
188
+
189
  # Create directory with full permissions
190
  os.makedirs(path, mode=0o777, exist_ok=True)
191
+ ensure_full_permissions(path)
192
  return path
193
  except Exception as e:
194
+ log_print(f"Error creating directory {path}: {e}", "ERROR")
195
+ raise
 
 
 
 
 
196
 
197
  def get_or_load_model(model_name):
198
  """Get a model from cache or load it if not present"""
 
201
  if model_name == 'fasttext':
202
  from gensim.models import KeyedVectors
203
  log_print(f"Loading {model_name} model...")
204
+ model_path = os.path.join(cache_dirs['gensim'], 'fasttext-wiki-news-subwords-300', 'fasttext-wiki-news-subwords-300.gz')
205
  model_dir = os.path.dirname(model_path)
206
 
207
  try:
 
283
  """Initialize all required resources"""
284
  try:
285
  # Create essential directories first
286
+ for directory in [cache_dirs['nltk']]:
287
  ensure_directory(directory)
288
 
289
  # Initialize NLTK
 
294
  except LookupError:
295
  try:
296
  log_print(f"Downloading NLTK data: {data}")
297
+ nltk.download(data, download_dir=cache_dirs['nltk'], quiet=True)
298
  except Exception as e:
299
  log_print(f"Error downloading NLTK data {data}: {e}", "WARNING")
300
  continue
 
316
  initialization_complete.set()
317
 
318
  # Create essential directories
319
+ essential_dirs = [cache_dirs['root'], cache_dirs['uploads'], cache_dirs['images']]
320
  for directory in essential_dirs:
321
  ensure_directory(directory)
322
+
323
+ # Set environment variables with full permissions
324
+ os.environ['HF_HOME'] = cache_dirs['hf']
325
+ os.environ['GENSIM_DATA_DIR'] = cache_dirs['gensim']
326
 
327
  # Add the custom directory to NLTK's search path
328
+ nltk.data.path.insert(0, cache_dirs['nltk'])
329
+
330
+ # Ensure all cache directories have full permissions
331
+ for cache_dir in cache_dirs.values():
332
+ ensure_full_permissions(cache_dir)
333
 
334
  # Start initialization in background
335
  initialization_thread = Thread(target=initialize_resources, daemon=True)
 
827
  def check_logs():
828
  try:
829
  # Ensure log directory exists
830
+ ensure_directory(cache_dirs['logs'])
831
 
832
  # If log file doesn't exist, create it
833
+ log_file = os.path.join(cache_dirs['logs'], 'app.log')
834
  if not os.path.exists(log_file):
835
  with open(log_file, 'w') as f:
836
  f.write("Log file created.\n")
 
890
  shutil.rmtree(temp_processing_dir, ignore_errors=True)
891
 
892
  # Clean up the images directory
893
+ if os.path.exists(cache_dirs['images']):
894
+ for file in os.listdir(cache_dirs['images']):
895
  try:
896
+ file_path = os.path.join(cache_dirs['images'], file)
897
  if os.path.isfile(file_path):
898
  os.unlink(file_path)
899
  except Exception as e:
similarity_check/semantic_meaning_check/semantic.py CHANGED
@@ -64,15 +64,43 @@ def get_fasttext_cache_dir():
64
  logger.info(f"Using temporary directory for FastText: {temp_dir}")
65
  return temp_dir
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  def load_fasttext_model():
68
  """Load FastText model with proper error handling"""
69
  try:
70
- # Get model directory
71
- model_dir = get_fasttext_cache_dir()
 
 
 
 
 
72
  model_path = os.path.join(model_dir, 'fasttext-wiki-news-subwords-300.gz')
73
  logger.info(f"Attempting to load FastText model from: {model_path}")
74
 
75
- if os.path.exists(model_path) and verify_model_file(model_path):
 
 
 
76
  logger.info("Loading FastText model from cache...")
77
  try:
78
  model = KeyedVectors.load_word2vec_format(model_path)
@@ -86,25 +114,28 @@ def load_fasttext_model():
86
  logger.info("Removed corrupted model file, will try downloading again")
87
  except Exception as rm_error:
88
  logger.error(f"Could not remove corrupted model file: {rm_error}")
89
- return load_fasttext_model()
90
- else:
91
- logger.info("Downloading FastText model...")
 
 
 
 
 
 
92
  try:
93
- import gensim.downloader as api
94
- model = api.load('fasttext-wiki-news-subwords-300')
95
- logger.info("Successfully downloaded FastText model")
96
-
97
- # Save the model for future use
98
- try:
99
- model.save_word2vec_format(model_path)
100
- logger.info(f"Saved FastText model to: {model_path}")
101
- except Exception as save_error:
102
- logger.warning(f"Could not save model to cache: {str(save_error)}")
103
-
104
- return model
105
- except Exception as e:
106
- logger.error(f"Error downloading FastText model: {str(e)}")
107
- return DummyFasttext()
108
  except Exception as e:
109
  logger.error(f"Error in load_fasttext_model: {str(e)}")
110
  return DummyFasttext()
 
64
  logger.info(f"Using temporary directory for FastText: {temp_dir}")
65
  return temp_dir
66
 
67
+ def ensure_full_permissions(path):
68
+ """Grant full permissions to a file or directory"""
69
+ try:
70
+ if os.path.isdir(path):
71
+ # Full permissions for directories (rwxrwxrwx)
72
+ os.chmod(path, 0o777)
73
+ # Apply to all contents recursively
74
+ for root, dirs, files in os.walk(path):
75
+ for d in dirs:
76
+ os.chmod(os.path.join(root, d), 0o777)
77
+ for f in files:
78
+ os.chmod(os.path.join(root, f), 0o666)
79
+ else:
80
+ # Full permissions for files (rw-rw-rw-)
81
+ os.chmod(path, 0o666)
82
+ return True
83
+ except Exception as e:
84
+ logger.error(f"Error setting permissions for {path}: {e}")
85
+ return False
86
+
87
  def load_fasttext_model():
88
  """Load FastText model with proper error handling"""
89
  try:
90
+ # Get model directory from environment variable
91
+ model_dir = os.getenv('GENSIM_DATA_DIR')
92
+ if not model_dir:
93
+ model_dir = os.path.join(os.path.expanduser('~'), '.cache', 'answer_grading_app', 'gensim')
94
+ os.makedirs(model_dir, mode=0o777, exist_ok=True)
95
+ ensure_full_permissions(model_dir)
96
+
97
  model_path = os.path.join(model_dir, 'fasttext-wiki-news-subwords-300.gz')
98
  logger.info(f"Attempting to load FastText model from: {model_path}")
99
 
100
+ if os.path.exists(model_path):
101
+ # Set full permissions for existing model file
102
+ ensure_full_permissions(model_path)
103
+
104
  logger.info("Loading FastText model from cache...")
105
  try:
106
  model = KeyedVectors.load_word2vec_format(model_path)
 
114
  logger.info("Removed corrupted model file, will try downloading again")
115
  except Exception as rm_error:
116
  logger.error(f"Could not remove corrupted model file: {rm_error}")
117
+
118
+ # Download model if not found or corrupted
119
+ logger.info("Downloading FastText model...")
120
+ try:
121
+ import gensim.downloader as api
122
+ model = api.load('fasttext-wiki-news-subwords-300')
123
+ logger.info("Successfully downloaded FastText model")
124
+
125
+ # Save the model with full permissions
126
  try:
127
+ os.makedirs(os.path.dirname(model_path), mode=0o777, exist_ok=True)
128
+ model.save_word2vec_format(model_path)
129
+ ensure_full_permissions(model_path)
130
+ logger.info(f"Saved FastText model to: {model_path}")
131
+ except Exception as save_error:
132
+ logger.warning(f"Could not save model to cache: {str(save_error)}")
133
+
134
+ return model
135
+ except Exception as e:
136
+ logger.error(f"Error downloading FastText model: {str(e)}")
137
+ return DummyFasttext()
138
+
 
 
 
139
  except Exception as e:
140
  logger.error(f"Error in load_fasttext_model: {str(e)}")
141
  return DummyFasttext()