Commit
·
2992571
1
Parent(s):
088b9f3
printing extracted text21
Browse files- all_models.py +121 -52
- main.py +91 -70
- similarity_check/semantic_meaning_check/semantic.py +52 -21
all_models.py
CHANGED
@@ -10,6 +10,26 @@ import shutil
|
|
10 |
logging.basicConfig(level=logging.INFO)
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
def check_directory_permissions(path):
|
14 |
"""Check if directory exists and has correct permissions"""
|
15 |
try:
|
@@ -17,17 +37,9 @@ def check_directory_permissions(path):
|
|
17 |
logger.warning(f"Directory does not exist: {path}")
|
18 |
return False
|
19 |
|
20 |
-
#
|
21 |
-
|
22 |
-
|
23 |
-
with open(test_file, 'w') as f:
|
24 |
-
f.write('test')
|
25 |
-
os.remove(test_file)
|
26 |
-
logger.info(f"Directory {path} is writable")
|
27 |
-
return True
|
28 |
-
except Exception as e:
|
29 |
-
logger.error(f"Directory {path} is not writable: {e}")
|
30 |
-
return False
|
31 |
except Exception as e:
|
32 |
logger.error(f"Error checking permissions for {path}: {e}")
|
33 |
return False
|
@@ -43,40 +55,46 @@ def get_cache_dir():
|
|
43 |
cache_dir = os.path.join(home_dir, '.cache', 'answer_grading_app')
|
44 |
logger.info(f"Attempting to use cache directory: {cache_dir}")
|
45 |
|
46 |
-
# Create directory with
|
47 |
-
os.makedirs(cache_dir, mode=
|
|
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
|
|
55 |
temp_dir = os.path.join(tempfile.gettempdir(), 'answer_grading_app')
|
56 |
logger.info(f"Attempting to use temporary directory: {temp_dir}")
|
57 |
|
58 |
-
os.makedirs(temp_dir, mode=
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
64 |
current_dir = os.path.join(os.getcwd(), '.cache')
|
65 |
logger.info(f"Attempting to use current directory: {current_dir}")
|
66 |
|
67 |
-
os.makedirs(current_dir, mode=
|
68 |
-
|
69 |
-
logger.info(f"Using current directory: {current_dir}")
|
70 |
-
return current_dir
|
71 |
-
|
72 |
-
raise Exception("Could not find a writable cache directory")
|
73 |
|
|
|
|
|
74 |
except Exception as e:
|
75 |
-
logger.error(f"
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
|
|
|
|
80 |
|
81 |
class ModelSingleton:
|
82 |
_instance = None
|
@@ -108,24 +126,55 @@ class ModelSingleton:
|
|
108 |
'fasttext': os.path.join(self.cache_dir, 'fasttext')
|
109 |
}
|
110 |
|
111 |
-
# Create and verify each cache directory
|
112 |
for name, path in self.cache_dirs.items():
|
113 |
try:
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
except Exception as e:
|
120 |
logger.error(f"Error creating {name} cache directory: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
|
122 |
-
# Set environment variables
|
123 |
os.environ['TRANSFORMERS_CACHE'] = self.cache_dirs['transformers']
|
124 |
os.environ['HF_HOME'] = self.cache_dirs['huggingface']
|
125 |
os.environ['TORCH_HOME'] = self.cache_dirs['torch']
|
126 |
os.environ['XDG_CACHE_HOME'] = self.cache_dirs['cache']
|
127 |
os.environ['SENTENCE_TRANSFORMERS_HOME'] = self.cache_dirs['sentence_transformers']
|
128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
# Get device
|
130 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
131 |
logger.info(f"Using device: {self.device}")
|
@@ -236,27 +285,45 @@ class ModelSingleton:
|
|
236 |
|
237 |
logger.info(f"Looking for model at: {model_path}")
|
238 |
|
|
|
239 |
if not os.path.exists(model_path):
|
240 |
raise FileNotFoundError(f"Model path does not exist: {model_path}")
|
241 |
|
242 |
-
#
|
|
|
|
|
|
|
243 |
model_files = os.listdir(model_path)
|
244 |
logger.info(f"Found model files: {', '.join(model_files)}")
|
245 |
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
|
251 |
-
# Create processor
|
252 |
self.vit_processor = ViTImageProcessor(
|
253 |
do_resize=True,
|
254 |
size=224,
|
255 |
do_normalize=True,
|
256 |
image_mean=[0.5, 0.5, 0.5],
|
257 |
-
image_std=[0.5, 0.5, 0.5]
|
|
|
258 |
)
|
259 |
|
|
|
|
|
|
|
|
|
|
|
260 |
try:
|
261 |
# First try loading as ViTModel
|
262 |
logger.info("Attempting to load as ViTModel...")
|
@@ -264,7 +331,8 @@ class ModelSingleton:
|
|
264 |
model_path,
|
265 |
local_files_only=True,
|
266 |
use_safetensors=True,
|
267 |
-
trust_remote_code=False
|
|
|
268 |
)
|
269 |
except Exception as e1:
|
270 |
logger.warning(f"Failed to load as ViTModel: {e1}")
|
@@ -275,7 +343,8 @@ class ModelSingleton:
|
|
275 |
model_path,
|
276 |
local_files_only=True,
|
277 |
use_safetensors=True,
|
278 |
-
trust_remote_code=False
|
|
|
279 |
)
|
280 |
except Exception as e2:
|
281 |
logger.error(f"Failed to load model using both methods")
|
|
|
10 |
logging.basicConfig(level=logging.INFO)
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
13 |
+
def ensure_full_permissions(path):
|
14 |
+
"""Grant full permissions to a file or directory"""
|
15 |
+
try:
|
16 |
+
if os.path.isdir(path):
|
17 |
+
# Full permissions for directories (rwxrwxrwx)
|
18 |
+
os.chmod(path, 0o777)
|
19 |
+
# Apply to all contents recursively
|
20 |
+
for root, dirs, files in os.walk(path):
|
21 |
+
for d in dirs:
|
22 |
+
os.chmod(os.path.join(root, d), 0o777)
|
23 |
+
for f in files:
|
24 |
+
os.chmod(os.path.join(root, f), 0o666)
|
25 |
+
else:
|
26 |
+
# Full permissions for files (rw-rw-rw-)
|
27 |
+
os.chmod(path, 0o666)
|
28 |
+
return True
|
29 |
+
except Exception as e:
|
30 |
+
logger.error(f"Error setting permissions for {path}: {e}")
|
31 |
+
return False
|
32 |
+
|
33 |
def check_directory_permissions(path):
|
34 |
"""Check if directory exists and has correct permissions"""
|
35 |
try:
|
|
|
37 |
logger.warning(f"Directory does not exist: {path}")
|
38 |
return False
|
39 |
|
40 |
+
# Set full permissions
|
41 |
+
ensure_full_permissions(path)
|
42 |
+
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
except Exception as e:
|
44 |
logger.error(f"Error checking permissions for {path}: {e}")
|
45 |
return False
|
|
|
55 |
cache_dir = os.path.join(home_dir, '.cache', 'answer_grading_app')
|
56 |
logger.info(f"Attempting to use cache directory: {cache_dir}")
|
57 |
|
58 |
+
# Create directory with full permissions
|
59 |
+
os.makedirs(cache_dir, mode=0o777, exist_ok=True)
|
60 |
+
ensure_full_permissions(cache_dir)
|
61 |
|
62 |
+
logger.info(f"Successfully created and verified cache directory: {cache_dir}")
|
63 |
+
return cache_dir
|
64 |
+
except Exception as e:
|
65 |
+
logger.warning(f"Could not use home directory cache: {e}")
|
66 |
+
|
67 |
+
# Try temp directory
|
68 |
+
try:
|
69 |
temp_dir = os.path.join(tempfile.gettempdir(), 'answer_grading_app')
|
70 |
logger.info(f"Attempting to use temporary directory: {temp_dir}")
|
71 |
|
72 |
+
os.makedirs(temp_dir, mode=0o777, exist_ok=True)
|
73 |
+
ensure_full_permissions(temp_dir)
|
74 |
+
|
75 |
+
logger.info(f"Using temporary directory: {temp_dir}")
|
76 |
+
return temp_dir
|
77 |
+
except Exception as e:
|
78 |
+
logger.warning(f"Could not use temp directory: {e}")
|
79 |
+
|
80 |
+
# Last resort: use current directory
|
81 |
+
try:
|
82 |
current_dir = os.path.join(os.getcwd(), '.cache')
|
83 |
logger.info(f"Attempting to use current directory: {current_dir}")
|
84 |
|
85 |
+
os.makedirs(current_dir, mode=0o777, exist_ok=True)
|
86 |
+
ensure_full_permissions(current_dir)
|
|
|
|
|
|
|
|
|
87 |
|
88 |
+
logger.info(f"Using current directory: {current_dir}")
|
89 |
+
return current_dir
|
90 |
except Exception as e:
|
91 |
+
logger.error(f"Could not create any cache directory: {e}")
|
92 |
+
|
93 |
+
# If all else fails, use a new temporary directory
|
94 |
+
temp_dir = tempfile.mkdtemp()
|
95 |
+
ensure_full_permissions(temp_dir)
|
96 |
+
logger.info(f"Created temporary directory as last resort: {temp_dir}")
|
97 |
+
return temp_dir
|
98 |
|
99 |
class ModelSingleton:
|
100 |
_instance = None
|
|
|
126 |
'fasttext': os.path.join(self.cache_dir, 'fasttext')
|
127 |
}
|
128 |
|
129 |
+
# Create and verify each cache directory with full permissions
|
130 |
for name, path in self.cache_dirs.items():
|
131 |
try:
|
132 |
+
# Create directory with full permissions
|
133 |
+
os.makedirs(path, mode=0o777, exist_ok=True)
|
134 |
+
ensure_full_permissions(path)
|
135 |
+
logger.info(f"Successfully created {name} cache directory: {path}")
|
136 |
+
|
137 |
+
# Create a test file to verify write permissions
|
138 |
+
test_file = os.path.join(path, '.write_test')
|
139 |
+
try:
|
140 |
+
with open(test_file, 'w') as f:
|
141 |
+
f.write('test')
|
142 |
+
os.chmod(test_file, 0o666) # Full read/write for test file
|
143 |
+
os.remove(test_file) # Clean up
|
144 |
+
logger.info(f"Verified write permissions for {name} cache directory")
|
145 |
+
except Exception as e:
|
146 |
+
logger.error(f"Failed to verify write permissions for {name} cache directory: {e}")
|
147 |
+
# Try to fix permissions
|
148 |
+
ensure_full_permissions(path)
|
149 |
+
|
150 |
except Exception as e:
|
151 |
logger.error(f"Error creating {name} cache directory: {e}")
|
152 |
+
# Try to create in temp directory as fallback
|
153 |
+
temp_path = os.path.join(tempfile.gettempdir(), 'answer_grading_app', name)
|
154 |
+
os.makedirs(temp_path, mode=0o777, exist_ok=True)
|
155 |
+
ensure_full_permissions(temp_path)
|
156 |
+
self.cache_dirs[name] = temp_path
|
157 |
+
logger.info(f"Using fallback directory for {name}: {temp_path}")
|
158 |
|
159 |
+
# Set environment variables with verified directories
|
160 |
os.environ['TRANSFORMERS_CACHE'] = self.cache_dirs['transformers']
|
161 |
os.environ['HF_HOME'] = self.cache_dirs['huggingface']
|
162 |
os.environ['TORCH_HOME'] = self.cache_dirs['torch']
|
163 |
os.environ['XDG_CACHE_HOME'] = self.cache_dirs['cache']
|
164 |
os.environ['SENTENCE_TRANSFORMERS_HOME'] = self.cache_dirs['sentence_transformers']
|
165 |
|
166 |
+
# Verify environment variables are set correctly
|
167 |
+
for env_var, path in [
|
168 |
+
('TRANSFORMERS_CACHE', 'transformers'),
|
169 |
+
('HF_HOME', 'huggingface'),
|
170 |
+
('TORCH_HOME', 'torch'),
|
171 |
+
('XDG_CACHE_HOME', 'cache'),
|
172 |
+
('SENTENCE_TRANSFORMERS_HOME', 'sentence_transformers')
|
173 |
+
]:
|
174 |
+
if os.environ.get(env_var) != self.cache_dirs[path]:
|
175 |
+
logger.warning(f"Environment variable {env_var} does not match expected path")
|
176 |
+
os.environ[env_var] = self.cache_dirs[path]
|
177 |
+
|
178 |
# Get device
|
179 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
180 |
logger.info(f"Using device: {self.device}")
|
|
|
285 |
|
286 |
logger.info(f"Looking for model at: {model_path}")
|
287 |
|
288 |
+
# Ensure model directory exists and has proper permissions
|
289 |
if not os.path.exists(model_path):
|
290 |
raise FileNotFoundError(f"Model path does not exist: {model_path}")
|
291 |
|
292 |
+
# Set full permissions for model directory
|
293 |
+
ensure_full_permissions(model_path)
|
294 |
+
|
295 |
+
# Check for model files and set their permissions
|
296 |
model_files = os.listdir(model_path)
|
297 |
logger.info(f"Found model files: {', '.join(model_files)}")
|
298 |
|
299 |
+
required_files = ['model.safetensors', 'config.json']
|
300 |
+
for file in required_files:
|
301 |
+
file_path = os.path.join(model_path, file)
|
302 |
+
if not os.path.exists(file_path):
|
303 |
+
raise FileNotFoundError(f"{file} not found in {model_path}")
|
304 |
+
# Set full permissions for model files
|
305 |
+
ensure_full_permissions(file_path)
|
306 |
+
logger.info(f"Set permissions for {file}")
|
307 |
+
|
308 |
+
# Create processor with proper cache directory
|
309 |
+
processor_cache = os.path.join(self.cache_dirs['transformers'], 'vit_processor')
|
310 |
+
os.makedirs(processor_cache, mode=0o777, exist_ok=True)
|
311 |
+
ensure_full_permissions(processor_cache)
|
312 |
|
|
|
313 |
self.vit_processor = ViTImageProcessor(
|
314 |
do_resize=True,
|
315 |
size=224,
|
316 |
do_normalize=True,
|
317 |
image_mean=[0.5, 0.5, 0.5],
|
318 |
+
image_std=[0.5, 0.5, 0.5],
|
319 |
+
cache_dir=processor_cache
|
320 |
)
|
321 |
|
322 |
+
# Try loading model with proper cache directory
|
323 |
+
model_cache = os.path.join(self.cache_dirs['transformers'], 'vit_model')
|
324 |
+
os.makedirs(model_cache, mode=0o777, exist_ok=True)
|
325 |
+
ensure_full_permissions(model_cache)
|
326 |
+
|
327 |
try:
|
328 |
# First try loading as ViTModel
|
329 |
logger.info("Attempting to load as ViTModel...")
|
|
|
331 |
model_path,
|
332 |
local_files_only=True,
|
333 |
use_safetensors=True,
|
334 |
+
trust_remote_code=False,
|
335 |
+
cache_dir=model_cache
|
336 |
)
|
337 |
except Exception as e1:
|
338 |
logger.warning(f"Failed to load as ViTModel: {e1}")
|
|
|
343 |
model_path,
|
344 |
local_files_only=True,
|
345 |
use_safetensors=True,
|
346 |
+
trust_remote_code=False,
|
347 |
+
cache_dir=model_cache
|
348 |
)
|
349 |
except Exception as e2:
|
350 |
logger.error(f"Failed to load model using both methods")
|
main.py
CHANGED
@@ -33,32 +33,57 @@ def log_print(message, level="INFO"):
|
|
33 |
"message": message
|
34 |
})
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
# Set environment variables before any other imports
|
37 |
-
cache_root = os.path.join(os.path.expanduser('~'), '.cache', 'answer_grading_app')
|
38 |
-
os.environ['TRANSFORMERS_CACHE'] = os.path.join(cache_root, 'transformers')
|
39 |
-
os.environ['HF_HOME'] = os.path.join(cache_root, 'huggingface')
|
40 |
-
os.environ['TORCH_HOME'] = os.path.join(cache_root, 'torch')
|
41 |
-
os.environ['XDG_CACHE_HOME'] = os.path.join(cache_root, 'cache')
|
42 |
-
os.environ['SENTENCE_TRANSFORMERS_HOME'] = os.path.join(cache_root, 'sentence_transformers')
|
43 |
-
os.environ['GENSIM_DATA_DIR'] = os.path.join(cache_root, 'gensim')
|
44 |
-
|
45 |
-
# Create all necessary cache directories with proper permissions
|
46 |
cache_dirs = {
|
47 |
-
'root':
|
48 |
-
'transformers': os.
|
49 |
-
'hf': os.
|
50 |
-
'torch': os.
|
51 |
-
'cache': os.
|
52 |
-
'sentence_transformers': os.
|
53 |
-
'gensim': os.
|
|
|
|
|
|
|
|
|
|
|
54 |
}
|
55 |
|
56 |
-
|
|
|
57 |
try:
|
58 |
-
os.makedirs(
|
59 |
-
log_print(f"Created
|
60 |
except Exception as e:
|
61 |
-
log_print(f"Error creating {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
# Now import the rest of the dependencies
|
64 |
import sys
|
@@ -127,17 +152,6 @@ if torch_error:
|
|
127 |
# Add the project root directory to Python path
|
128 |
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
129 |
|
130 |
-
# Create cache directory if it doesn't exist
|
131 |
-
BASE_DIR = '/tmp' # Use direct /tmp path for Hugging Face
|
132 |
-
log_dir = os.path.join(BASE_DIR, 'app_logs')
|
133 |
-
cache_dir = os.path.join(BASE_DIR, 'app_cache')
|
134 |
-
nltk_data_dir = os.path.join(BASE_DIR, 'nltk_data')
|
135 |
-
gensim_data_dir = os.path.join(BASE_DIR, 'gensim-data')
|
136 |
-
upload_dir = os.path.join(BASE_DIR, 'uploads')
|
137 |
-
ans_image_dir = os.path.join(BASE_DIR, 'ans_image')
|
138 |
-
images_dir = os.path.join(BASE_DIR, 'images')
|
139 |
-
log_file = os.path.join(log_dir, 'app.log') # Add log file path
|
140 |
-
|
141 |
# Global variables for model caching and initialization status
|
142 |
global_models = {}
|
143 |
initialization_complete = Event()
|
@@ -145,38 +159,40 @@ initialization_complete = Event()
|
|
145 |
# Initialize model singleton
|
146 |
models = ModelSingleton()
|
147 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
def ensure_directory(path):
|
149 |
-
"""Create directory and ensure full permissions
|
150 |
-
if os.path.exists(path):
|
151 |
-
try:
|
152 |
-
# Test write permissions
|
153 |
-
test_file = os.path.join(path, '.test')
|
154 |
-
with open(test_file, 'w') as f:
|
155 |
-
f.write('test')
|
156 |
-
os.remove(test_file)
|
157 |
-
return path
|
158 |
-
except Exception as e:
|
159 |
-
log_print(f"Warning: Directory exists but not writable: {path}", "WARNING")
|
160 |
-
try:
|
161 |
-
# Try to fix permissions
|
162 |
-
os.chmod(path, 0o777)
|
163 |
-
return path
|
164 |
-
except Exception as chmod_e:
|
165 |
-
log_print(f"Error fixing permissions for {path}: {chmod_e}", "ERROR")
|
166 |
-
raise
|
167 |
-
|
168 |
try:
|
|
|
|
|
|
|
|
|
169 |
# Create directory with full permissions
|
170 |
os.makedirs(path, mode=0o777, exist_ok=True)
|
|
|
171 |
return path
|
172 |
except Exception as e:
|
173 |
-
|
174 |
-
|
175 |
-
os.makedirs(path, mode=0o755, exist_ok=True)
|
176 |
-
return path
|
177 |
-
except Exception as nested_e:
|
178 |
-
log_print(f"Error creating directory {path}: {nested_e}", "ERROR")
|
179 |
-
raise
|
180 |
|
181 |
def get_or_load_model(model_name):
|
182 |
"""Get a model from cache or load it if not present"""
|
@@ -185,7 +201,7 @@ def get_or_load_model(model_name):
|
|
185 |
if model_name == 'fasttext':
|
186 |
from gensim.models import KeyedVectors
|
187 |
log_print(f"Loading {model_name} model...")
|
188 |
-
model_path = os.path.join(
|
189 |
model_dir = os.path.dirname(model_path)
|
190 |
|
191 |
try:
|
@@ -267,7 +283,7 @@ def initialize_resources():
|
|
267 |
"""Initialize all required resources"""
|
268 |
try:
|
269 |
# Create essential directories first
|
270 |
-
for directory in [
|
271 |
ensure_directory(directory)
|
272 |
|
273 |
# Initialize NLTK
|
@@ -278,7 +294,7 @@ def initialize_resources():
|
|
278 |
except LookupError:
|
279 |
try:
|
280 |
log_print(f"Downloading NLTK data: {data}")
|
281 |
-
nltk.download(data, download_dir=
|
282 |
except Exception as e:
|
283 |
log_print(f"Error downloading NLTK data {data}: {e}", "WARNING")
|
284 |
continue
|
@@ -300,16 +316,20 @@ def initialize_resources():
|
|
300 |
initialization_complete.set()
|
301 |
|
302 |
# Create essential directories
|
303 |
-
essential_dirs = [
|
304 |
for directory in essential_dirs:
|
305 |
ensure_directory(directory)
|
306 |
-
|
307 |
-
# Set environment variables
|
308 |
-
os.environ['HF_HOME'] =
|
309 |
-
os.environ['GENSIM_DATA_DIR'] =
|
310 |
|
311 |
# Add the custom directory to NLTK's search path
|
312 |
-
nltk.data.path.insert(0,
|
|
|
|
|
|
|
|
|
313 |
|
314 |
# Start initialization in background
|
315 |
initialization_thread = Thread(target=initialize_resources, daemon=True)
|
@@ -807,9 +827,10 @@ def marks(answer, sen_vec_answers, word_vec_answers, tf_idf_word_values, max_tfi
|
|
807 |
def check_logs():
|
808 |
try:
|
809 |
# Ensure log directory exists
|
810 |
-
ensure_directory(
|
811 |
|
812 |
# If log file doesn't exist, create it
|
|
|
813 |
if not os.path.exists(log_file):
|
814 |
with open(log_file, 'w') as f:
|
815 |
f.write("Log file created.\n")
|
@@ -869,10 +890,10 @@ def cleanup_temp_files():
|
|
869 |
shutil.rmtree(temp_processing_dir, ignore_errors=True)
|
870 |
|
871 |
# Clean up the images directory
|
872 |
-
if os.path.exists(
|
873 |
-
for file in os.listdir(
|
874 |
try:
|
875 |
-
file_path = os.path.join(
|
876 |
if os.path.isfile(file_path):
|
877 |
os.unlink(file_path)
|
878 |
except Exception as e:
|
|
|
33 |
"message": message
|
34 |
})
|
35 |
|
36 |
+
def get_user_cache_dir():
|
37 |
+
"""Get a user-accessible cache directory"""
|
38 |
+
try:
|
39 |
+
# Try user's home directory first
|
40 |
+
user_cache = os.path.join(os.path.expanduser('~'), '.cache', 'answer_grading_app')
|
41 |
+
if not os.path.exists(user_cache):
|
42 |
+
os.makedirs(user_cache, mode=0o755, exist_ok=True)
|
43 |
+
return user_cache
|
44 |
+
except Exception as e:
|
45 |
+
log_print(f"Error creating user cache directory: {e}", "WARNING")
|
46 |
+
# Fallback to temp directory
|
47 |
+
temp_dir = os.path.join(tempfile.gettempdir(), 'answer_grading_app')
|
48 |
+
os.makedirs(temp_dir, mode=0o755, exist_ok=True)
|
49 |
+
return temp_dir
|
50 |
+
|
51 |
+
# Set up base directories
|
52 |
+
BASE_DIR = get_user_cache_dir()
|
53 |
+
log_print(f"Using base directory: {BASE_DIR}")
|
54 |
+
|
55 |
# Set environment variables before any other imports
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
cache_dirs = {
|
57 |
+
'root': BASE_DIR,
|
58 |
+
'transformers': os.path.join(BASE_DIR, 'transformers'),
|
59 |
+
'hf': os.path.join(BASE_DIR, 'huggingface'),
|
60 |
+
'torch': os.path.join(BASE_DIR, 'torch'),
|
61 |
+
'cache': os.path.join(BASE_DIR, 'cache'),
|
62 |
+
'sentence_transformers': os.path.join(BASE_DIR, 'sentence_transformers'),
|
63 |
+
'gensim': os.path.join(BASE_DIR, 'gensim'),
|
64 |
+
'nltk': os.path.join(BASE_DIR, 'nltk_data'),
|
65 |
+
'logs': os.path.join(BASE_DIR, 'logs'),
|
66 |
+
'uploads': os.path.join(BASE_DIR, 'uploads'),
|
67 |
+
'images': os.path.join(BASE_DIR, 'images'),
|
68 |
+
'ans_image': os.path.join(BASE_DIR, 'ans_image')
|
69 |
}
|
70 |
|
71 |
+
# Create all necessary directories with proper permissions
|
72 |
+
for name, path in cache_dirs.items():
|
73 |
try:
|
74 |
+
os.makedirs(path, mode=0o755, exist_ok=True)
|
75 |
+
log_print(f"Created directory: {path}")
|
76 |
except Exception as e:
|
77 |
+
log_print(f"Error creating directory {name}: {e}", "ERROR")
|
78 |
+
|
79 |
+
# Set environment variables
|
80 |
+
os.environ['TRANSFORMERS_CACHE'] = cache_dirs['transformers']
|
81 |
+
os.environ['HF_HOME'] = cache_dirs['hf']
|
82 |
+
os.environ['TORCH_HOME'] = cache_dirs['torch']
|
83 |
+
os.environ['XDG_CACHE_HOME'] = cache_dirs['cache']
|
84 |
+
os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dirs['sentence_transformers']
|
85 |
+
os.environ['GENSIM_DATA_DIR'] = cache_dirs['gensim']
|
86 |
+
os.environ['NLTK_DATA'] = cache_dirs['nltk']
|
87 |
|
88 |
# Now import the rest of the dependencies
|
89 |
import sys
|
|
|
152 |
# Add the project root directory to Python path
|
153 |
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
# Global variables for model caching and initialization status
|
156 |
global_models = {}
|
157 |
initialization_complete = Event()
|
|
|
159 |
# Initialize model singleton
|
160 |
models = ModelSingleton()
|
161 |
|
162 |
+
def ensure_full_permissions(path):
|
163 |
+
"""Grant full permissions to a file or directory"""
|
164 |
+
try:
|
165 |
+
if os.path.isdir(path):
|
166 |
+
# Full permissions for directories (rwxrwxrwx)
|
167 |
+
os.chmod(path, 0o777)
|
168 |
+
# Apply to all contents recursively
|
169 |
+
for root, dirs, files in os.walk(path):
|
170 |
+
for d in dirs:
|
171 |
+
os.chmod(os.path.join(root, d), 0o777)
|
172 |
+
for f in files:
|
173 |
+
os.chmod(os.path.join(root, f), 0o666)
|
174 |
+
else:
|
175 |
+
# Full permissions for files (rw-rw-rw-)
|
176 |
+
os.chmod(path, 0o666)
|
177 |
+
return True
|
178 |
+
except Exception as e:
|
179 |
+
log_print(f"Error setting permissions for {path}: {e}", "ERROR")
|
180 |
+
return False
|
181 |
+
|
182 |
def ensure_directory(path):
|
183 |
+
"""Create directory and ensure full permissions"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
try:
|
185 |
+
if os.path.exists(path):
|
186 |
+
ensure_full_permissions(path)
|
187 |
+
return path
|
188 |
+
|
189 |
# Create directory with full permissions
|
190 |
os.makedirs(path, mode=0o777, exist_ok=True)
|
191 |
+
ensure_full_permissions(path)
|
192 |
return path
|
193 |
except Exception as e:
|
194 |
+
log_print(f"Error creating directory {path}: {e}", "ERROR")
|
195 |
+
raise
|
|
|
|
|
|
|
|
|
|
|
196 |
|
197 |
def get_or_load_model(model_name):
|
198 |
"""Get a model from cache or load it if not present"""
|
|
|
201 |
if model_name == 'fasttext':
|
202 |
from gensim.models import KeyedVectors
|
203 |
log_print(f"Loading {model_name} model...")
|
204 |
+
model_path = os.path.join(cache_dirs['gensim'], 'fasttext-wiki-news-subwords-300', 'fasttext-wiki-news-subwords-300.gz')
|
205 |
model_dir = os.path.dirname(model_path)
|
206 |
|
207 |
try:
|
|
|
283 |
"""Initialize all required resources"""
|
284 |
try:
|
285 |
# Create essential directories first
|
286 |
+
for directory in [cache_dirs['nltk']]:
|
287 |
ensure_directory(directory)
|
288 |
|
289 |
# Initialize NLTK
|
|
|
294 |
except LookupError:
|
295 |
try:
|
296 |
log_print(f"Downloading NLTK data: {data}")
|
297 |
+
nltk.download(data, download_dir=cache_dirs['nltk'], quiet=True)
|
298 |
except Exception as e:
|
299 |
log_print(f"Error downloading NLTK data {data}: {e}", "WARNING")
|
300 |
continue
|
|
|
316 |
initialization_complete.set()
|
317 |
|
318 |
# Create essential directories
|
319 |
+
essential_dirs = [cache_dirs['root'], cache_dirs['uploads'], cache_dirs['images']]
|
320 |
for directory in essential_dirs:
|
321 |
ensure_directory(directory)
|
322 |
+
|
323 |
+
# Set environment variables with full permissions
|
324 |
+
os.environ['HF_HOME'] = cache_dirs['hf']
|
325 |
+
os.environ['GENSIM_DATA_DIR'] = cache_dirs['gensim']
|
326 |
|
327 |
# Add the custom directory to NLTK's search path
|
328 |
+
nltk.data.path.insert(0, cache_dirs['nltk'])
|
329 |
+
|
330 |
+
# Ensure all cache directories have full permissions
|
331 |
+
for cache_dir in cache_dirs.values():
|
332 |
+
ensure_full_permissions(cache_dir)
|
333 |
|
334 |
# Start initialization in background
|
335 |
initialization_thread = Thread(target=initialize_resources, daemon=True)
|
|
|
827 |
def check_logs():
|
828 |
try:
|
829 |
# Ensure log directory exists
|
830 |
+
ensure_directory(cache_dirs['logs'])
|
831 |
|
832 |
# If log file doesn't exist, create it
|
833 |
+
log_file = os.path.join(cache_dirs['logs'], 'app.log')
|
834 |
if not os.path.exists(log_file):
|
835 |
with open(log_file, 'w') as f:
|
836 |
f.write("Log file created.\n")
|
|
|
890 |
shutil.rmtree(temp_processing_dir, ignore_errors=True)
|
891 |
|
892 |
# Clean up the images directory
|
893 |
+
if os.path.exists(cache_dirs['images']):
|
894 |
+
for file in os.listdir(cache_dirs['images']):
|
895 |
try:
|
896 |
+
file_path = os.path.join(cache_dirs['images'], file)
|
897 |
if os.path.isfile(file_path):
|
898 |
os.unlink(file_path)
|
899 |
except Exception as e:
|
similarity_check/semantic_meaning_check/semantic.py
CHANGED
@@ -64,15 +64,43 @@ def get_fasttext_cache_dir():
|
|
64 |
logger.info(f"Using temporary directory for FastText: {temp_dir}")
|
65 |
return temp_dir
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
def load_fasttext_model():
|
68 |
"""Load FastText model with proper error handling"""
|
69 |
try:
|
70 |
-
# Get model directory
|
71 |
-
model_dir =
|
|
|
|
|
|
|
|
|
|
|
72 |
model_path = os.path.join(model_dir, 'fasttext-wiki-news-subwords-300.gz')
|
73 |
logger.info(f"Attempting to load FastText model from: {model_path}")
|
74 |
|
75 |
-
if os.path.exists(model_path)
|
|
|
|
|
|
|
76 |
logger.info("Loading FastText model from cache...")
|
77 |
try:
|
78 |
model = KeyedVectors.load_word2vec_format(model_path)
|
@@ -86,25 +114,28 @@ def load_fasttext_model():
|
|
86 |
logger.info("Removed corrupted model file, will try downloading again")
|
87 |
except Exception as rm_error:
|
88 |
logger.error(f"Could not remove corrupted model file: {rm_error}")
|
89 |
-
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
try:
|
93 |
-
|
94 |
-
model
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
except Exception as e:
|
106 |
-
logger.error(f"Error downloading FastText model: {str(e)}")
|
107 |
-
return DummyFasttext()
|
108 |
except Exception as e:
|
109 |
logger.error(f"Error in load_fasttext_model: {str(e)}")
|
110 |
return DummyFasttext()
|
|
|
64 |
logger.info(f"Using temporary directory for FastText: {temp_dir}")
|
65 |
return temp_dir
|
66 |
|
67 |
+
def ensure_full_permissions(path):
|
68 |
+
"""Grant full permissions to a file or directory"""
|
69 |
+
try:
|
70 |
+
if os.path.isdir(path):
|
71 |
+
# Full permissions for directories (rwxrwxrwx)
|
72 |
+
os.chmod(path, 0o777)
|
73 |
+
# Apply to all contents recursively
|
74 |
+
for root, dirs, files in os.walk(path):
|
75 |
+
for d in dirs:
|
76 |
+
os.chmod(os.path.join(root, d), 0o777)
|
77 |
+
for f in files:
|
78 |
+
os.chmod(os.path.join(root, f), 0o666)
|
79 |
+
else:
|
80 |
+
# Full permissions for files (rw-rw-rw-)
|
81 |
+
os.chmod(path, 0o666)
|
82 |
+
return True
|
83 |
+
except Exception as e:
|
84 |
+
logger.error(f"Error setting permissions for {path}: {e}")
|
85 |
+
return False
|
86 |
+
|
87 |
def load_fasttext_model():
|
88 |
"""Load FastText model with proper error handling"""
|
89 |
try:
|
90 |
+
# Get model directory from environment variable
|
91 |
+
model_dir = os.getenv('GENSIM_DATA_DIR')
|
92 |
+
if not model_dir:
|
93 |
+
model_dir = os.path.join(os.path.expanduser('~'), '.cache', 'answer_grading_app', 'gensim')
|
94 |
+
os.makedirs(model_dir, mode=0o777, exist_ok=True)
|
95 |
+
ensure_full_permissions(model_dir)
|
96 |
+
|
97 |
model_path = os.path.join(model_dir, 'fasttext-wiki-news-subwords-300.gz')
|
98 |
logger.info(f"Attempting to load FastText model from: {model_path}")
|
99 |
|
100 |
+
if os.path.exists(model_path):
|
101 |
+
# Set full permissions for existing model file
|
102 |
+
ensure_full_permissions(model_path)
|
103 |
+
|
104 |
logger.info("Loading FastText model from cache...")
|
105 |
try:
|
106 |
model = KeyedVectors.load_word2vec_format(model_path)
|
|
|
114 |
logger.info("Removed corrupted model file, will try downloading again")
|
115 |
except Exception as rm_error:
|
116 |
logger.error(f"Could not remove corrupted model file: {rm_error}")
|
117 |
+
|
118 |
+
# Download model if not found or corrupted
|
119 |
+
logger.info("Downloading FastText model...")
|
120 |
+
try:
|
121 |
+
import gensim.downloader as api
|
122 |
+
model = api.load('fasttext-wiki-news-subwords-300')
|
123 |
+
logger.info("Successfully downloaded FastText model")
|
124 |
+
|
125 |
+
# Save the model with full permissions
|
126 |
try:
|
127 |
+
os.makedirs(os.path.dirname(model_path), mode=0o777, exist_ok=True)
|
128 |
+
model.save_word2vec_format(model_path)
|
129 |
+
ensure_full_permissions(model_path)
|
130 |
+
logger.info(f"Saved FastText model to: {model_path}")
|
131 |
+
except Exception as save_error:
|
132 |
+
logger.warning(f"Could not save model to cache: {str(save_error)}")
|
133 |
+
|
134 |
+
return model
|
135 |
+
except Exception as e:
|
136 |
+
logger.error(f"Error downloading FastText model: {str(e)}")
|
137 |
+
return DummyFasttext()
|
138 |
+
|
|
|
|
|
|
|
139 |
except Exception as e:
|
140 |
logger.error(f"Error in load_fasttext_model: {str(e)}")
|
141 |
return DummyFasttext()
|