yamanavijayavardhan commited on
Commit
088b9f3
·
1 Parent(s): 94589be

printing extracted text20

Browse files
all_models.py CHANGED
@@ -4,11 +4,80 @@ import torch
4
  import os
5
  import tempfile
6
  import logging
 
7
 
8
  # Set up logging
9
  logging.basicConfig(level=logging.INFO)
10
  logger = logging.getLogger(__name__)
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  class ModelSingleton:
13
  _instance = None
14
  _initialized = False
@@ -23,9 +92,39 @@ class ModelSingleton:
23
  def __init__(self):
24
  if not self._initialized:
25
  try:
26
- # Set cache directory to temporary directory
27
- cache_dir = os.getenv('TRANSFORMERS_CACHE', tempfile.gettempdir())
28
- os.environ['TRANSFORMERS_CACHE'] = cache_dir
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  # Get device
31
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -42,16 +141,18 @@ class ModelSingleton:
42
  self.vit_processor = None
43
 
44
  # Initialize reference counts
45
- self._reference_counts['similarity'] = 0
46
- self._reference_counts['flan'] = 0
47
- self._reference_counts['trocr'] = 0
48
- self._reference_counts['vit'] = 0
 
 
49
 
50
  self._initialized = True
51
- logger.info("Model singleton initialized")
52
 
53
  except Exception as e:
54
- logger.error(f"Error during model initialization: {e}")
55
  raise
56
 
57
  def get_similarity_model(self):
 
4
  import os
5
  import tempfile
6
  import logging
7
+ import shutil
8
 
9
  # Set up logging
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
12
 
13
+ def check_directory_permissions(path):
14
+ """Check if directory exists and has correct permissions"""
15
+ try:
16
+ if not os.path.exists(path):
17
+ logger.warning(f"Directory does not exist: {path}")
18
+ return False
19
+
20
+ # Try to create a test file
21
+ test_file = os.path.join(path, '.permission_test')
22
+ try:
23
+ with open(test_file, 'w') as f:
24
+ f.write('test')
25
+ os.remove(test_file)
26
+ logger.info(f"Directory {path} is writable")
27
+ return True
28
+ except Exception as e:
29
+ logger.error(f"Directory {path} is not writable: {e}")
30
+ return False
31
+ except Exception as e:
32
+ logger.error(f"Error checking permissions for {path}: {e}")
33
+ return False
34
+
35
+ def get_cache_dir():
36
+ """Get a user-accessible cache directory"""
37
+ try:
38
+ # Try user's home directory first
39
+ home_dir = os.path.expanduser('~')
40
+ if not os.path.exists(home_dir):
41
+ raise Exception(f"Home directory does not exist: {home_dir}")
42
+
43
+ cache_dir = os.path.join(home_dir, '.cache', 'answer_grading_app')
44
+ logger.info(f"Attempting to use cache directory: {cache_dir}")
45
+
46
+ # Create directory with proper permissions
47
+ os.makedirs(cache_dir, mode=0o755, exist_ok=True)
48
+
49
+ # Verify permissions
50
+ if check_directory_permissions(cache_dir):
51
+ logger.info(f"Successfully created and verified cache directory: {cache_dir}")
52
+ return cache_dir
53
+
54
+ # If home directory fails, try temp directory
55
+ temp_dir = os.path.join(tempfile.gettempdir(), 'answer_grading_app')
56
+ logger.info(f"Attempting to use temporary directory: {temp_dir}")
57
+
58
+ os.makedirs(temp_dir, mode=0o755, exist_ok=True)
59
+ if check_directory_permissions(temp_dir):
60
+ logger.info(f"Using temporary directory: {temp_dir}")
61
+ return temp_dir
62
+
63
+ # Last resort: use current directory
64
+ current_dir = os.path.join(os.getcwd(), '.cache')
65
+ logger.info(f"Attempting to use current directory: {current_dir}")
66
+
67
+ os.makedirs(current_dir, mode=0o755, exist_ok=True)
68
+ if check_directory_permissions(current_dir):
69
+ logger.info(f"Using current directory: {current_dir}")
70
+ return current_dir
71
+
72
+ raise Exception("Could not find a writable cache directory")
73
+
74
+ except Exception as e:
75
+ logger.error(f"Error setting up cache directory: {e}")
76
+ # Create a new temporary directory as last resort
77
+ temp_dir = tempfile.mkdtemp()
78
+ logger.info(f"Created temporary directory as fallback: {temp_dir}")
79
+ return temp_dir
80
+
81
  class ModelSingleton:
82
  _instance = None
83
  _initialized = False
 
92
  def __init__(self):
93
  if not self._initialized:
94
  try:
95
+ logger.info("Initializing ModelSingleton...")
96
+
97
+ # Set up cache directories
98
+ self.cache_dir = get_cache_dir()
99
+ logger.info(f"Using main cache directory: {self.cache_dir}")
100
+
101
+ # Define and create all cache directories
102
+ self.cache_dirs = {
103
+ 'transformers': os.path.join(self.cache_dir, 'transformers'),
104
+ 'huggingface': os.path.join(self.cache_dir, 'huggingface'),
105
+ 'torch': os.path.join(self.cache_dir, 'torch'),
106
+ 'cache': os.path.join(self.cache_dir, 'cache'),
107
+ 'sentence_transformers': os.path.join(self.cache_dir, 'sentence_transformers'),
108
+ 'fasttext': os.path.join(self.cache_dir, 'fasttext')
109
+ }
110
+
111
+ # Create and verify each cache directory
112
+ for name, path in self.cache_dirs.items():
113
+ try:
114
+ os.makedirs(path, mode=0o755, exist_ok=True)
115
+ if check_directory_permissions(path):
116
+ logger.info(f"Successfully created {name} cache directory: {path}")
117
+ else:
118
+ logger.error(f"Failed to verify permissions for {name} cache directory")
119
+ except Exception as e:
120
+ logger.error(f"Error creating {name} cache directory: {e}")
121
+
122
+ # Set environment variables
123
+ os.environ['TRANSFORMERS_CACHE'] = self.cache_dirs['transformers']
124
+ os.environ['HF_HOME'] = self.cache_dirs['huggingface']
125
+ os.environ['TORCH_HOME'] = self.cache_dirs['torch']
126
+ os.environ['XDG_CACHE_HOME'] = self.cache_dirs['cache']
127
+ os.environ['SENTENCE_TRANSFORMERS_HOME'] = self.cache_dirs['sentence_transformers']
128
 
129
  # Get device
130
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
 
141
  self.vit_processor = None
142
 
143
  # Initialize reference counts
144
+ self._reference_counts = {
145
+ 'similarity': 0,
146
+ 'flan': 0,
147
+ 'trocr': 0,
148
+ 'vit': 0
149
+ }
150
 
151
  self._initialized = True
152
+ logger.info("ModelSingleton initialization completed successfully")
153
 
154
  except Exception as e:
155
+ logger.error(f"Error during ModelSingleton initialization: {e}")
156
  raise
157
 
158
  def get_similarity_model(self):
similarity_check/semantic_meaning_check/semantic.py CHANGED
@@ -12,7 +12,7 @@ import sys
12
  import os
13
  import tempfile
14
  sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
15
- from all_models import models
16
  import torch
17
  import logging
18
  from utils import log_print
@@ -21,36 +21,92 @@ from utils import log_print
21
  logging.basicConfig(level=logging.INFO)
22
  logger = logging.getLogger(__name__)
23
 
24
- # Use custom directory for gensim data
25
- gensim_data_dir = os.getenv('GENSIM_DATA_DIR', tempfile.gettempdir())
26
- os.environ['GENSIM_DATA_DIR'] = gensim_data_dir
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  def load_fasttext_model():
29
  """Load FastText model with proper error handling"""
30
  try:
31
- # Get the model directory from environment variable or use default
32
- model_dir = os.getenv('GENSIM_DATA_DIR', os.path.expanduser('~/.cache/gensim-data'))
33
- model_path = os.path.join(model_dir, 'fasttext-wiki-news-subwords-300', 'fasttext-wiki-news-subwords-300.gz')
 
34
 
35
- # Create directory if it doesn't exist
36
- os.makedirs(os.path.dirname(model_path), exist_ok=True)
37
-
38
- if os.path.exists(model_path):
39
- log_print("Loading fasttext model from cache...")
40
- return KeyedVectors.load_word2vec_format(model_path)
 
 
 
 
 
 
 
 
 
41
  else:
42
- log_print("Downloading fasttext model...")
43
- import gensim.downloader as api
44
- model = api.load('fasttext-wiki-news-subwords-300')
45
-
46
- # Save the model for future use
47
- os.makedirs(os.path.dirname(model_path), exist_ok=True)
48
- model.save_word2vec_format(model_path)
49
-
50
- return model
 
 
 
 
 
 
 
 
51
  except Exception as e:
52
- log_print(f"Error loading fasttext model: {str(e)}", "ERROR")
53
- # Return a dummy model that provides basic word vector functionality
54
  return DummyFasttext()
55
 
56
  class DummyFasttext:
 
12
  import os
13
  import tempfile
14
  sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
15
+ from all_models import models, get_cache_dir, check_directory_permissions
16
  import torch
17
  import logging
18
  from utils import log_print
 
21
  logging.basicConfig(level=logging.INFO)
22
  logger = logging.getLogger(__name__)
23
 
24
+ def verify_model_file(model_path):
25
+ """Verify that model file exists and is readable"""
26
+ try:
27
+ if not os.path.exists(model_path):
28
+ logger.error(f"Model file does not exist: {model_path}")
29
+ return False
30
+
31
+ # Check file size
32
+ size = os.path.getsize(model_path)
33
+ if size == 0:
34
+ logger.error(f"Model file is empty: {model_path}")
35
+ return False
36
+
37
+ # Try to open file
38
+ with open(model_path, 'rb') as f:
39
+ # Try to read first few bytes
40
+ f.read(1024)
41
+ logger.info(f"Model file is readable: {model_path}")
42
+ return True
43
+ except Exception as e:
44
+ logger.error(f"Error verifying model file {model_path}: {e}")
45
+ return False
46
+
47
+ def get_fasttext_cache_dir():
48
+ """Get cache directory for FastText model"""
49
+ cache_dir = get_cache_dir()
50
+ fasttext_dir = os.path.join(cache_dir, 'fasttext')
51
+
52
+ logger.info(f"Setting up FastText cache directory: {fasttext_dir}")
53
+
54
+ try:
55
+ os.makedirs(fasttext_dir, mode=0o755, exist_ok=True)
56
+ if check_directory_permissions(fasttext_dir):
57
+ logger.info(f"FastText cache directory is ready: {fasttext_dir}")
58
+ return fasttext_dir
59
+ except Exception as e:
60
+ logger.error(f"Error creating FastText cache directory: {e}")
61
+
62
+ # Fallback to temporary directory
63
+ temp_dir = tempfile.mkdtemp()
64
+ logger.info(f"Using temporary directory for FastText: {temp_dir}")
65
+ return temp_dir
66
 
67
  def load_fasttext_model():
68
  """Load FastText model with proper error handling"""
69
  try:
70
+ # Get model directory
71
+ model_dir = get_fasttext_cache_dir()
72
+ model_path = os.path.join(model_dir, 'fasttext-wiki-news-subwords-300.gz')
73
+ logger.info(f"Attempting to load FastText model from: {model_path}")
74
 
75
+ if os.path.exists(model_path) and verify_model_file(model_path):
76
+ logger.info("Loading FastText model from cache...")
77
+ try:
78
+ model = KeyedVectors.load_word2vec_format(model_path)
79
+ logger.info("Successfully loaded FastText model from cache")
80
+ return model
81
+ except Exception as e:
82
+ logger.error(f"Error loading cached model: {str(e)}")
83
+ # If loading fails, try downloading again
84
+ try:
85
+ os.remove(model_path)
86
+ logger.info("Removed corrupted model file, will try downloading again")
87
+ except Exception as rm_error:
88
+ logger.error(f"Could not remove corrupted model file: {rm_error}")
89
+ return load_fasttext_model()
90
  else:
91
+ logger.info("Downloading FastText model...")
92
+ try:
93
+ import gensim.downloader as api
94
+ model = api.load('fasttext-wiki-news-subwords-300')
95
+ logger.info("Successfully downloaded FastText model")
96
+
97
+ # Save the model for future use
98
+ try:
99
+ model.save_word2vec_format(model_path)
100
+ logger.info(f"Saved FastText model to: {model_path}")
101
+ except Exception as save_error:
102
+ logger.warning(f"Could not save model to cache: {str(save_error)}")
103
+
104
+ return model
105
+ except Exception as e:
106
+ logger.error(f"Error downloading FastText model: {str(e)}")
107
+ return DummyFasttext()
108
  except Exception as e:
109
+ logger.error(f"Error in load_fasttext_model: {str(e)}")
 
110
  return DummyFasttext()
111
 
112
  class DummyFasttext: