yamanavijayavardhan commited on
Commit
2d9524f
·
1 Parent(s): 77e1eaf

printing extracted text12

Browse files
Files changed (1) hide show
  1. main.py +128 -19
main.py CHANGED
@@ -3,21 +3,59 @@ import tempfile
3
  import gc
4
  import psutil
5
  import time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  # Set environment variables before any other imports
8
  os.environ['TRANSFORMERS_CACHE'] = os.path.join(tempfile.gettempdir(), 'huggingface_cache')
9
  os.environ['HF_HOME'] = os.path.join(tempfile.gettempdir(), 'huggingface')
10
  os.environ['TORCH_HOME'] = os.path.join(tempfile.gettempdir(), 'torch')
11
  os.environ['XDG_CACHE_HOME'] = os.path.join(tempfile.gettempdir(), 'cache')
12
-
13
- # Create cache directories
14
- for cache_dir in [os.environ['TRANSFORMERS_CACHE'], os.environ['HF_HOME'],
15
- os.environ['TORCH_HOME'], os.environ['XDG_CACHE_HOME']]:
16
- os.makedirs(cache_dir, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  # Now import the rest of the dependencies
19
  import sys
20
- import logging
21
  from pathlib import Path
22
  from flask import Flask, request, jsonify, render_template, send_file, Response
23
  from werkzeug.utils import secure_filename
@@ -28,7 +66,6 @@ import io
28
  import base64
29
  from datetime import datetime
30
  import json
31
- import queue
32
  import threading
33
  from threading import Thread, Event
34
  import warnings
@@ -36,6 +73,14 @@ from flask_cors import CORS
36
  from dotenv import load_dotenv
37
  warnings.filterwarnings('ignore')
38
 
 
 
 
 
 
 
 
 
39
  # Import ML libraries with timeout protection
40
  def import_with_timeout(import_statement, timeout=30):
41
  """Import a module with a timeout to prevent hanging"""
@@ -125,14 +170,6 @@ def ensure_directory(path):
125
  log_print(f"Error creating directory {path}: {nested_e}", "ERROR")
126
  raise
127
 
128
- # Simplified logging setup
129
- logging.basicConfig(
130
- level=logging.INFO,
131
- format='%(asctime)s - %(levelname)s - %(message)s',
132
- handlers=[logging.StreamHandler(sys.stdout)]
133
- )
134
- logger = logging.getLogger(__name__)
135
-
136
  def get_or_load_model(model_name):
137
  """Get a model from cache or load it if not present"""
138
  if model_name not in global_models:
@@ -144,16 +181,84 @@ def get_or_load_model(model_name):
144
  if not os.path.exists(model_path):
145
  from gensim.downloader import load
146
  log_print("Downloading fasttext model...")
147
- global_models[model_name] = load('fasttext-wiki-news-subwords-300')
 
 
 
 
148
  else:
149
- global_models[model_name] = KeyedVectors.load_word2vec_format(model_path)
 
 
 
 
150
  log_print(f"Successfully loaded {model_name} model")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  elif model_name == 'llm':
152
- # Implement LLM model loading here
153
  log_print("LLM model loading not implemented", "WARNING")
154
  return None
155
  except Exception as e:
156
- log_print(f"Error loading {model_name} model: {e}", "ERROR")
157
  return None
158
  return global_models.get(model_name)
159
 
@@ -179,7 +284,11 @@ def initialize_resources():
179
 
180
  # Initialize models
181
  try:
 
182
  get_or_load_model('fasttext')
 
 
 
183
  except Exception as e:
184
  log_print(f"Warning: Could not preload models: {e}", "WARNING")
185
 
 
3
  import gc
4
  import psutil
5
  import time
6
+ import logging
7
+ import queue
8
+
9
+ # Set up logging first
10
+ logging.basicConfig(
11
+ level=logging.INFO,
12
+ format='%(asctime)s - %(levelname)s - %(message)s',
13
+ handlers=[logging.StreamHandler()]
14
+ )
15
+ logger = logging.getLogger(__name__)
16
+
17
+ # Create notification queue for real-time updates
18
+ notification_queue = queue.Queue()
19
+
20
+ def log_print(message, level="INFO"):
21
+ """Unified logging function"""
22
+ if level == "ERROR":
23
+ logger.error(message)
24
+ elif level == "WARNING":
25
+ logger.warning(message)
26
+ else:
27
+ logger.info(message)
28
+ # Also put the message in notification queue for frontend
29
+ notification_queue.put({
30
+ "type": level.lower(),
31
+ "message": message
32
+ })
33
 
34
  # Set environment variables before any other imports
35
  os.environ['TRANSFORMERS_CACHE'] = os.path.join(tempfile.gettempdir(), 'huggingface_cache')
36
  os.environ['HF_HOME'] = os.path.join(tempfile.gettempdir(), 'huggingface')
37
  os.environ['TORCH_HOME'] = os.path.join(tempfile.gettempdir(), 'torch')
38
  os.environ['XDG_CACHE_HOME'] = os.path.join(tempfile.gettempdir(), 'cache')
39
+ os.environ['SENTENCE_TRANSFORMERS_HOME'] = os.path.join(tempfile.gettempdir(), 'sentence_transformers')
40
+
41
+ # Create all necessary cache directories
42
+ cache_dirs = {
43
+ 'transformers': os.environ['TRANSFORMERS_CACHE'],
44
+ 'hf': os.environ['HF_HOME'],
45
+ 'torch': os.environ['TORCH_HOME'],
46
+ 'cache': os.environ['XDG_CACHE_HOME'],
47
+ 'sentence_transformers': os.environ['SENTENCE_TRANSFORMERS_HOME']
48
+ }
49
+
50
+ for cache_name, cache_dir in cache_dirs.items():
51
+ try:
52
+ os.makedirs(cache_dir, exist_ok=True)
53
+ log_print(f"Created cache directory for {cache_name}: {cache_dir}")
54
+ except Exception as e:
55
+ log_print(f"Error creating {cache_name} cache directory: {e}", "ERROR")
56
 
57
  # Now import the rest of the dependencies
58
  import sys
 
59
  from pathlib import Path
60
  from flask import Flask, request, jsonify, render_template, send_file, Response
61
  from werkzeug.utils import secure_filename
 
66
  import base64
67
  from datetime import datetime
68
  import json
 
69
  import threading
70
  from threading import Thread, Event
71
  import warnings
 
73
  from dotenv import load_dotenv
74
  warnings.filterwarnings('ignore')
75
 
76
+ # Import ML libraries
77
+ import torch
78
+ import nltk
79
+ import gensim
80
+ from gensim.models import FastText
81
+ from sentence_transformers import SentenceTransformer
82
+ from transformers import pipeline
83
+
84
  # Import ML libraries with timeout protection
85
  def import_with_timeout(import_statement, timeout=30):
86
  """Import a module with a timeout to prevent hanging"""
 
170
  log_print(f"Error creating directory {path}: {nested_e}", "ERROR")
171
  raise
172
 
 
 
 
 
 
 
 
 
173
  def get_or_load_model(model_name):
174
  """Get a model from cache or load it if not present"""
175
  if model_name not in global_models:
 
181
  if not os.path.exists(model_path):
182
  from gensim.downloader import load
183
  log_print("Downloading fasttext model...")
184
+ model = load('fasttext-wiki-news-subwords-300')
185
+ # Move model to CPU explicitly
186
+ if hasattr(model, 'to'):
187
+ model = model.to('cpu')
188
+ global_models[model_name] = model
189
  else:
190
+ model = KeyedVectors.load_word2vec_format(model_path)
191
+ # Move model to CPU explicitly
192
+ if hasattr(model, 'to'):
193
+ model = model.to('cpu')
194
+ global_models[model_name] = model
195
  log_print(f"Successfully loaded {model_name} model")
196
+ elif model_name == 'vit':
197
+ try:
198
+ from transformers import ViTImageProcessor, ViTModel
199
+ log_print("Loading ViT model...")
200
+
201
+ # Use a more common ViT model that's guaranteed to exist
202
+ model_name = "google/vit-base-patch16-224-in21k"
203
+ cache_dir = os.path.join(os.environ['TRANSFORMERS_CACHE'], 'vit-models')
204
+ os.makedirs(cache_dir, exist_ok=True)
205
+
206
+ try:
207
+ # Try to load the processor first
208
+ log_print("Loading ViT image processor...")
209
+ processor = ViTImageProcessor.from_pretrained(model_name,
210
+ cache_dir=cache_dir,
211
+ local_files_only=True)
212
+ log_print("Loading ViT model from cache...")
213
+ model = ViTModel.from_pretrained(model_name,
214
+ cache_dir=cache_dir,
215
+ local_files_only=True)
216
+
217
+ # Move model to CPU explicitly
218
+ model = model.to('cpu')
219
+
220
+ global_models['vit_processor'] = processor
221
+ global_models['vit_model'] = model
222
+ log_print("Successfully loaded ViT model from cache")
223
+
224
+ except Exception as cache_error:
225
+ log_print(f"Cache load failed ({str(cache_error)}), downloading model...")
226
+ processor = ViTImageProcessor.from_pretrained(model_name,
227
+ cache_dir=cache_dir,
228
+ local_files_only=False)
229
+ model = ViTModel.from_pretrained(model_name,
230
+ cache_dir=cache_dir,
231
+ local_files_only=False)
232
+
233
+ # Move model to CPU explicitly
234
+ model = model.to('cpu')
235
+
236
+ global_models['vit_processor'] = processor
237
+ global_models['vit_model'] = model
238
+ log_print("Successfully downloaded and loaded ViT model")
239
+
240
+ except Exception as e:
241
+ log_print(f"Error loading ViT model: {str(e)}", "ERROR")
242
+ try:
243
+ log_print("Trying alternative ViT model...")
244
+ model_name = "google/vit-base-patch16-224"
245
+ processor = ViTImageProcessor.from_pretrained(model_name)
246
+ model = ViTModel.from_pretrained(model_name)
247
+
248
+ # Move model to CPU explicitly
249
+ model = model.to('cpu')
250
+
251
+ global_models['vit_processor'] = processor
252
+ global_models['vit_model'] = model
253
+ log_print("Successfully loaded alternative ViT model")
254
+ except Exception as alt_error:
255
+ log_print(f"Error loading alternative ViT model: {str(alt_error)}", "ERROR")
256
+ return None
257
  elif model_name == 'llm':
 
258
  log_print("LLM model loading not implemented", "WARNING")
259
  return None
260
  except Exception as e:
261
+ log_print(f"Error loading {model_name} model: {str(e)}", "ERROR")
262
  return None
263
  return global_models.get(model_name)
264
 
 
284
 
285
  # Initialize models
286
  try:
287
+ # Load FastText first
288
  get_or_load_model('fasttext')
289
+
290
+ # Then load ViT model
291
+ get_or_load_model('vit')
292
  except Exception as e:
293
  log_print(f"Warning: Could not preload models: {e}", "WARNING")
294