yamanavijayavardhan commited on
Commit
77e1eaf
·
1 Parent(s): e31bcf4

printing extracted text11

Browse files
Files changed (1) hide show
  1. main.py +50 -37
main.py CHANGED
@@ -2,10 +2,18 @@ import os
2
  import tempfile
3
  import gc
4
  import psutil
 
5
 
6
- # Set up Hugging Face cache directory
7
  os.environ['TRANSFORMERS_CACHE'] = os.path.join(tempfile.gettempdir(), 'huggingface_cache')
8
- os.makedirs(os.environ['TRANSFORMERS_CACHE'], exist_ok=True)
 
 
 
 
 
 
 
9
 
10
  # Now import the rest of the dependencies
11
  import sys
@@ -23,49 +31,53 @@ import json
23
  import queue
24
  import threading
25
  from threading import Thread, Event
26
- import time
27
- import nltk
28
- import gensim
29
- from gensim.models import FastText
30
- from sentence_transformers import SentenceTransformer
31
- from transformers import pipeline
32
- import torch
33
- import torch.nn.functional as F
34
- from sklearn.feature_extraction.text import TfidfVectorizer
35
- from sklearn.metrics.pairwise import cosine_similarity
36
- import re
37
- import string
38
- import unicodedata
39
  import warnings
40
  from flask_cors import CORS
41
  from dotenv import load_dotenv
42
  warnings.filterwarnings('ignore')
43
 
44
- # Add the project root directory to Python path
45
- sys.path.append(os.path.dirname(os.path.abspath(__file__)))
46
-
47
- # Set up Hugging Face cache directory
48
- os.environ['TRANSFORMERS_CACHE'] = os.path.join(tempfile.gettempdir(), 'huggingface_cache')
49
- os.environ['HF_HOME'] = os.path.join(tempfile.gettempdir(), 'huggingface')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- # Create cache directory if it doesn't exist
52
- os.makedirs(os.environ['TRANSFORMERS_CACHE'], exist_ok=True)
53
- os.makedirs(os.environ['HF_HOME'], exist_ok=True)
 
54
 
55
- # Import HTR modules
56
- from HTR.app import extract_text_from_image
57
- from HTR.word import convert_image
58
- from HTR.strike import struck_images
59
- from HTR.hcr import text
60
- from HTR.spell_and_gramer_check import spell_grammer
61
 
62
- # Import utils
63
- from utils import notification_queue, log_print
 
64
 
65
- # Load environment variables
66
- load_dotenv()
67
 
68
- # Create directories in /tmp which is writable
69
  BASE_DIR = '/tmp' # Use direct /tmp path for Hugging Face
70
  log_dir = os.path.join(BASE_DIR, 'app_logs')
71
  cache_dir = os.path.join(BASE_DIR, 'app_cache')
@@ -142,7 +154,7 @@ def get_or_load_model(model_name):
142
  return None
143
  except Exception as e:
144
  log_print(f"Error loading {model_name} model: {e}", "ERROR")
145
- raise
146
  return global_models.get(model_name)
147
 
148
  def initialize_resources():
@@ -162,7 +174,8 @@ def initialize_resources():
162
  log_print(f"Downloading NLTK data: {data}")
163
  nltk.download(data, download_dir=nltk_data_dir, quiet=True)
164
  except Exception as e:
165
- log_print(f"Error downloading NLTK data {data}: {e}", "ERROR")
 
166
 
167
  # Initialize models
168
  try:
 
2
  import tempfile
3
  import gc
4
  import psutil
5
+ import time
6
 
7
+ # Set environment variables before any other imports
8
  os.environ['TRANSFORMERS_CACHE'] = os.path.join(tempfile.gettempdir(), 'huggingface_cache')
9
+ os.environ['HF_HOME'] = os.path.join(tempfile.gettempdir(), 'huggingface')
10
+ os.environ['TORCH_HOME'] = os.path.join(tempfile.gettempdir(), 'torch')
11
+ os.environ['XDG_CACHE_HOME'] = os.path.join(tempfile.gettempdir(), 'cache')
12
+
13
+ # Create cache directories
14
+ for cache_dir in [os.environ['TRANSFORMERS_CACHE'], os.environ['HF_HOME'],
15
+ os.environ['TORCH_HOME'], os.environ['XDG_CACHE_HOME']]:
16
+ os.makedirs(cache_dir, exist_ok=True)
17
 
18
  # Now import the rest of the dependencies
19
  import sys
 
31
  import queue
32
  import threading
33
  from threading import Thread, Event
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  import warnings
35
  from flask_cors import CORS
36
  from dotenv import load_dotenv
37
  warnings.filterwarnings('ignore')
38
 
39
+ # Import ML libraries with timeout protection
40
+ def import_with_timeout(import_statement, timeout=30):
41
+ """Import a module with a timeout to prevent hanging"""
42
+ result = {'success': False, 'module': None, 'error': None}
43
+
44
+ def _import():
45
+ try:
46
+ if isinstance(import_statement, str):
47
+ result['module'] = __import__(import_statement)
48
+ else:
49
+ exec(import_statement)
50
+ result['success'] = True
51
+ except Exception as e:
52
+ result['error'] = str(e)
53
+
54
+ thread = Thread(target=_import)
55
+ thread.daemon = True
56
+ thread.start()
57
+ thread.join(timeout=timeout)
58
+
59
+ if thread.is_alive():
60
+ return None, f"Import timed out after {timeout} seconds"
61
+
62
+ return result['module'], result['error']
63
 
64
+ # Import ML libraries safely
65
+ nltk, nltk_error = import_with_timeout('nltk')
66
+ if nltk_error:
67
+ log_print(f"Warning: NLTK import failed: {nltk_error}", "WARNING")
68
 
69
+ gensim, gensim_error = import_with_timeout('gensim')
70
+ if gensim_error:
71
+ log_print(f"Warning: Gensim import failed: {gensim_error}", "WARNING")
 
 
 
72
 
73
+ torch, torch_error = import_with_timeout('torch')
74
+ if torch_error:
75
+ log_print(f"Warning: PyTorch import failed: {torch_error}", "WARNING")
76
 
77
+ # Add the project root directory to Python path
78
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
79
 
80
+ # Create cache directory if it doesn't exist
81
  BASE_DIR = '/tmp' # Use direct /tmp path for Hugging Face
82
  log_dir = os.path.join(BASE_DIR, 'app_logs')
83
  cache_dir = os.path.join(BASE_DIR, 'app_cache')
 
154
  return None
155
  except Exception as e:
156
  log_print(f"Error loading {model_name} model: {e}", "ERROR")
157
+ return None
158
  return global_models.get(model_name)
159
 
160
  def initialize_resources():
 
174
  log_print(f"Downloading NLTK data: {data}")
175
  nltk.download(data, download_dir=nltk_data_dir, quiet=True)
176
  except Exception as e:
177
+ log_print(f"Error downloading NLTK data {data}: {e}", "WARNING")
178
+ continue
179
 
180
  # Initialize models
181
  try: