Commit
·
2d9524f
1
Parent(s):
77e1eaf
printing extracted text12
Browse files
main.py
CHANGED
@@ -3,21 +3,59 @@ import tempfile
|
|
3 |
import gc
|
4 |
import psutil
|
5 |
import time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
# Set environment variables before any other imports
|
8 |
os.environ['TRANSFORMERS_CACHE'] = os.path.join(tempfile.gettempdir(), 'huggingface_cache')
|
9 |
os.environ['HF_HOME'] = os.path.join(tempfile.gettempdir(), 'huggingface')
|
10 |
os.environ['TORCH_HOME'] = os.path.join(tempfile.gettempdir(), 'torch')
|
11 |
os.environ['XDG_CACHE_HOME'] = os.path.join(tempfile.gettempdir(), 'cache')
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
os.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
# Now import the rest of the dependencies
|
19 |
import sys
|
20 |
-
import logging
|
21 |
from pathlib import Path
|
22 |
from flask import Flask, request, jsonify, render_template, send_file, Response
|
23 |
from werkzeug.utils import secure_filename
|
@@ -28,7 +66,6 @@ import io
|
|
28 |
import base64
|
29 |
from datetime import datetime
|
30 |
import json
|
31 |
-
import queue
|
32 |
import threading
|
33 |
from threading import Thread, Event
|
34 |
import warnings
|
@@ -36,6 +73,14 @@ from flask_cors import CORS
|
|
36 |
from dotenv import load_dotenv
|
37 |
warnings.filterwarnings('ignore')
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
# Import ML libraries with timeout protection
|
40 |
def import_with_timeout(import_statement, timeout=30):
|
41 |
"""Import a module with a timeout to prevent hanging"""
|
@@ -125,14 +170,6 @@ def ensure_directory(path):
|
|
125 |
log_print(f"Error creating directory {path}: {nested_e}", "ERROR")
|
126 |
raise
|
127 |
|
128 |
-
# Simplified logging setup
|
129 |
-
logging.basicConfig(
|
130 |
-
level=logging.INFO,
|
131 |
-
format='%(asctime)s - %(levelname)s - %(message)s',
|
132 |
-
handlers=[logging.StreamHandler(sys.stdout)]
|
133 |
-
)
|
134 |
-
logger = logging.getLogger(__name__)
|
135 |
-
|
136 |
def get_or_load_model(model_name):
|
137 |
"""Get a model from cache or load it if not present"""
|
138 |
if model_name not in global_models:
|
@@ -144,16 +181,84 @@ def get_or_load_model(model_name):
|
|
144 |
if not os.path.exists(model_path):
|
145 |
from gensim.downloader import load
|
146 |
log_print("Downloading fasttext model...")
|
147 |
-
|
|
|
|
|
|
|
|
|
148 |
else:
|
149 |
-
|
|
|
|
|
|
|
|
|
150 |
log_print(f"Successfully loaded {model_name} model")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
151 |
elif model_name == 'llm':
|
152 |
-
# Implement LLM model loading here
|
153 |
log_print("LLM model loading not implemented", "WARNING")
|
154 |
return None
|
155 |
except Exception as e:
|
156 |
-
log_print(f"Error loading {model_name} model: {e}", "ERROR")
|
157 |
return None
|
158 |
return global_models.get(model_name)
|
159 |
|
@@ -179,7 +284,11 @@ def initialize_resources():
|
|
179 |
|
180 |
# Initialize models
|
181 |
try:
|
|
|
182 |
get_or_load_model('fasttext')
|
|
|
|
|
|
|
183 |
except Exception as e:
|
184 |
log_print(f"Warning: Could not preload models: {e}", "WARNING")
|
185 |
|
|
|
3 |
import gc
|
4 |
import psutil
|
5 |
import time
|
6 |
+
import logging
|
7 |
+
import queue
|
8 |
+
|
9 |
+
# Set up logging first
|
10 |
+
logging.basicConfig(
|
11 |
+
level=logging.INFO,
|
12 |
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
13 |
+
handlers=[logging.StreamHandler()]
|
14 |
+
)
|
15 |
+
logger = logging.getLogger(__name__)
|
16 |
+
|
17 |
+
# Create notification queue for real-time updates
|
18 |
+
notification_queue = queue.Queue()
|
19 |
+
|
20 |
+
def log_print(message, level="INFO"):
|
21 |
+
"""Unified logging function"""
|
22 |
+
if level == "ERROR":
|
23 |
+
logger.error(message)
|
24 |
+
elif level == "WARNING":
|
25 |
+
logger.warning(message)
|
26 |
+
else:
|
27 |
+
logger.info(message)
|
28 |
+
# Also put the message in notification queue for frontend
|
29 |
+
notification_queue.put({
|
30 |
+
"type": level.lower(),
|
31 |
+
"message": message
|
32 |
+
})
|
33 |
|
34 |
# Set environment variables before any other imports
|
35 |
os.environ['TRANSFORMERS_CACHE'] = os.path.join(tempfile.gettempdir(), 'huggingface_cache')
|
36 |
os.environ['HF_HOME'] = os.path.join(tempfile.gettempdir(), 'huggingface')
|
37 |
os.environ['TORCH_HOME'] = os.path.join(tempfile.gettempdir(), 'torch')
|
38 |
os.environ['XDG_CACHE_HOME'] = os.path.join(tempfile.gettempdir(), 'cache')
|
39 |
+
os.environ['SENTENCE_TRANSFORMERS_HOME'] = os.path.join(tempfile.gettempdir(), 'sentence_transformers')
|
40 |
+
|
41 |
+
# Create all necessary cache directories
|
42 |
+
cache_dirs = {
|
43 |
+
'transformers': os.environ['TRANSFORMERS_CACHE'],
|
44 |
+
'hf': os.environ['HF_HOME'],
|
45 |
+
'torch': os.environ['TORCH_HOME'],
|
46 |
+
'cache': os.environ['XDG_CACHE_HOME'],
|
47 |
+
'sentence_transformers': os.environ['SENTENCE_TRANSFORMERS_HOME']
|
48 |
+
}
|
49 |
+
|
50 |
+
for cache_name, cache_dir in cache_dirs.items():
|
51 |
+
try:
|
52 |
+
os.makedirs(cache_dir, exist_ok=True)
|
53 |
+
log_print(f"Created cache directory for {cache_name}: {cache_dir}")
|
54 |
+
except Exception as e:
|
55 |
+
log_print(f"Error creating {cache_name} cache directory: {e}", "ERROR")
|
56 |
|
57 |
# Now import the rest of the dependencies
|
58 |
import sys
|
|
|
59 |
from pathlib import Path
|
60 |
from flask import Flask, request, jsonify, render_template, send_file, Response
|
61 |
from werkzeug.utils import secure_filename
|
|
|
66 |
import base64
|
67 |
from datetime import datetime
|
68 |
import json
|
|
|
69 |
import threading
|
70 |
from threading import Thread, Event
|
71 |
import warnings
|
|
|
73 |
from dotenv import load_dotenv
|
74 |
warnings.filterwarnings('ignore')
|
75 |
|
76 |
+
# Import ML libraries
|
77 |
+
import torch
|
78 |
+
import nltk
|
79 |
+
import gensim
|
80 |
+
from gensim.models import FastText
|
81 |
+
from sentence_transformers import SentenceTransformer
|
82 |
+
from transformers import pipeline
|
83 |
+
|
84 |
# Import ML libraries with timeout protection
|
85 |
def import_with_timeout(import_statement, timeout=30):
|
86 |
"""Import a module with a timeout to prevent hanging"""
|
|
|
170 |
log_print(f"Error creating directory {path}: {nested_e}", "ERROR")
|
171 |
raise
|
172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
def get_or_load_model(model_name):
|
174 |
"""Get a model from cache or load it if not present"""
|
175 |
if model_name not in global_models:
|
|
|
181 |
if not os.path.exists(model_path):
|
182 |
from gensim.downloader import load
|
183 |
log_print("Downloading fasttext model...")
|
184 |
+
model = load('fasttext-wiki-news-subwords-300')
|
185 |
+
# Move model to CPU explicitly
|
186 |
+
if hasattr(model, 'to'):
|
187 |
+
model = model.to('cpu')
|
188 |
+
global_models[model_name] = model
|
189 |
else:
|
190 |
+
model = KeyedVectors.load_word2vec_format(model_path)
|
191 |
+
# Move model to CPU explicitly
|
192 |
+
if hasattr(model, 'to'):
|
193 |
+
model = model.to('cpu')
|
194 |
+
global_models[model_name] = model
|
195 |
log_print(f"Successfully loaded {model_name} model")
|
196 |
+
elif model_name == 'vit':
|
197 |
+
try:
|
198 |
+
from transformers import ViTImageProcessor, ViTModel
|
199 |
+
log_print("Loading ViT model...")
|
200 |
+
|
201 |
+
# Use a more common ViT model that's guaranteed to exist
|
202 |
+
model_name = "google/vit-base-patch16-224-in21k"
|
203 |
+
cache_dir = os.path.join(os.environ['TRANSFORMERS_CACHE'], 'vit-models')
|
204 |
+
os.makedirs(cache_dir, exist_ok=True)
|
205 |
+
|
206 |
+
try:
|
207 |
+
# Try to load the processor first
|
208 |
+
log_print("Loading ViT image processor...")
|
209 |
+
processor = ViTImageProcessor.from_pretrained(model_name,
|
210 |
+
cache_dir=cache_dir,
|
211 |
+
local_files_only=True)
|
212 |
+
log_print("Loading ViT model from cache...")
|
213 |
+
model = ViTModel.from_pretrained(model_name,
|
214 |
+
cache_dir=cache_dir,
|
215 |
+
local_files_only=True)
|
216 |
+
|
217 |
+
# Move model to CPU explicitly
|
218 |
+
model = model.to('cpu')
|
219 |
+
|
220 |
+
global_models['vit_processor'] = processor
|
221 |
+
global_models['vit_model'] = model
|
222 |
+
log_print("Successfully loaded ViT model from cache")
|
223 |
+
|
224 |
+
except Exception as cache_error:
|
225 |
+
log_print(f"Cache load failed ({str(cache_error)}), downloading model...")
|
226 |
+
processor = ViTImageProcessor.from_pretrained(model_name,
|
227 |
+
cache_dir=cache_dir,
|
228 |
+
local_files_only=False)
|
229 |
+
model = ViTModel.from_pretrained(model_name,
|
230 |
+
cache_dir=cache_dir,
|
231 |
+
local_files_only=False)
|
232 |
+
|
233 |
+
# Move model to CPU explicitly
|
234 |
+
model = model.to('cpu')
|
235 |
+
|
236 |
+
global_models['vit_processor'] = processor
|
237 |
+
global_models['vit_model'] = model
|
238 |
+
log_print("Successfully downloaded and loaded ViT model")
|
239 |
+
|
240 |
+
except Exception as e:
|
241 |
+
log_print(f"Error loading ViT model: {str(e)}", "ERROR")
|
242 |
+
try:
|
243 |
+
log_print("Trying alternative ViT model...")
|
244 |
+
model_name = "google/vit-base-patch16-224"
|
245 |
+
processor = ViTImageProcessor.from_pretrained(model_name)
|
246 |
+
model = ViTModel.from_pretrained(model_name)
|
247 |
+
|
248 |
+
# Move model to CPU explicitly
|
249 |
+
model = model.to('cpu')
|
250 |
+
|
251 |
+
global_models['vit_processor'] = processor
|
252 |
+
global_models['vit_model'] = model
|
253 |
+
log_print("Successfully loaded alternative ViT model")
|
254 |
+
except Exception as alt_error:
|
255 |
+
log_print(f"Error loading alternative ViT model: {str(alt_error)}", "ERROR")
|
256 |
+
return None
|
257 |
elif model_name == 'llm':
|
|
|
258 |
log_print("LLM model loading not implemented", "WARNING")
|
259 |
return None
|
260 |
except Exception as e:
|
261 |
+
log_print(f"Error loading {model_name} model: {str(e)}", "ERROR")
|
262 |
return None
|
263 |
return global_models.get(model_name)
|
264 |
|
|
|
284 |
|
285 |
# Initialize models
|
286 |
try:
|
287 |
+
# Load FastText first
|
288 |
get_or_load_model('fasttext')
|
289 |
+
|
290 |
+
# Then load ViT model
|
291 |
+
get_or_load_model('vit')
|
292 |
except Exception as e:
|
293 |
log_print(f"Warning: Could not preload models: {e}", "WARNING")
|
294 |
|