yamanavijayavardhan commited on
Commit
3885e21
·
1 Parent(s): 7acb8dd

printing extracted text17

Browse files
Files changed (2) hide show
  1. HTR/app.py +4 -7
  2. HTR/strike.py +49 -32
HTR/app.py CHANGED
@@ -89,16 +89,13 @@ def extract_text_from_image(img_path):
89
  imgs = convert_image(img)
90
  if not imgs:
91
  log_print("No text regions detected, processing whole image...", "WARNING")
92
- # Try processing the whole image as one region
93
- temp_path = os.path.join(tempfile.gettempdir(), 'whole_image.png')
94
- cv2.imwrite(temp_path, img)
95
- imgs = [temp_path]
96
 
97
  log_print(f"Found {len(imgs)} text regions")
98
 
99
  log_print("Processing text regions...")
100
- images_path = struck_images(imgs)
101
- if not images_path:
102
  error_msg = "No valid text regions after processing"
103
  log_print(error_msg, "ERROR")
104
  notification_queue.put({
@@ -108,7 +105,7 @@ def extract_text_from_image(img_path):
108
  return ""
109
 
110
  log_print("Extracting text from regions...")
111
- t = text(images_path)
112
  if not t:
113
  error_msg = "No text could be extracted from image"
114
  log_print(error_msg, "ERROR")
 
89
  imgs = convert_image(img)
90
  if not imgs:
91
  log_print("No text regions detected, processing whole image...", "WARNING")
92
+ imgs = [img]
 
 
 
93
 
94
  log_print(f"Found {len(imgs)} text regions")
95
 
96
  log_print("Processing text regions...")
97
+ processed_images = struck_images(imgs)
98
+ if not processed_images:
99
  error_msg = "No valid text regions after processing"
100
  log_print(error_msg, "ERROR")
101
  notification_queue.put({
 
105
  return ""
106
 
107
  log_print("Extracting text from regions...")
108
+ t = text(processed_images)
109
  if not t:
110
  error_msg = "No text could be extracted from image"
111
  log_print(error_msg, "ERROR")
HTR/strike.py CHANGED
@@ -23,7 +23,7 @@ try:
23
 
24
  # Get the absolute path to the model
25
  current_dir = os.path.dirname(os.path.abspath(__file__))
26
- project_root = os.path.dirname(os.path.dirname(current_dir))
27
  model_path = os.path.join(project_root, "models", "vit-base-beans")
28
 
29
  # Check if model path exists and has proper permissions
@@ -37,7 +37,7 @@ try:
37
  raise PermissionError(f"No read permission for model path: {model_path}")
38
 
39
  # Check for required model files
40
- required_files = ['config.json', 'pytorch_model.bin']
41
  for file in required_files:
42
  file_path = os.path.join(model_path, file)
43
  if not os.path.exists(file_path):
@@ -49,9 +49,13 @@ try:
49
 
50
  logger.info(f"Loading model from: {model_path}")
51
 
52
- # Load model from local path
53
  config = AutoConfig.from_pretrained(model_path)
54
- model = AutoModelForImageClassification.from_pretrained(model_path)
 
 
 
 
55
 
56
  if torch.cuda.is_available():
57
  model = model.to('cuda')
@@ -88,42 +92,55 @@ def image_preprocessing(image):
88
  logger.error(f"Error in image_preprocessing: {str(e)}")
89
  return None
90
 
91
- def predict_image(image_paths, model):
92
  try:
93
- preprocessed_img = image_preprocessing(image_path)
94
- images = torch.stack(preprocessed_img)
95
- images = images.permute(0, 3, 1, 2)
96
- predictions = model(images).logits.detach().numpy()
 
 
 
 
 
 
 
 
 
 
97
  return predictions
98
 
99
  except Exception as e:
100
  logger.error(f"Error in predict_image: {str(e)}")
101
- return process_without_model(image_paths)
102
 
 
 
 
 
103
 
104
- def struck_images(word__image):
105
- # folder_path = 'images'
106
- # images_path = []
107
- # for filename in os.listdir(folder_path):
108
- # file_path = os.path.join(folder_path, filename)
109
- # images_path.append(file_path)
110
- # # print()
111
- # images_path.sort(key=lambda x: int(os.path.splitext(os.path.basename(x))[0]))
112
-
113
- # # print(images_path)
114
-
115
- # # images_path = images_path[:2]
116
  try:
117
- predictions = predict_image(word__image, model)
118
-
119
- not_struck =[]
 
 
 
 
 
 
 
120
  for i in range(len(predictions)):
121
- if predictions[i].argmax().item() == 0:
122
- # not_struck.append(images_path[i])
123
- not_struck.append(word__image[i])
124
-
125
- # print(not_struck)
 
 
126
  return not_struck
 
127
  except Exception as e:
128
- logger.error(f"Error in process_without_model: {str(e)}")
129
- return None # Return all as not struck
 
23
 
24
  # Get the absolute path to the model
25
  current_dir = os.path.dirname(os.path.abspath(__file__))
26
+ project_root = os.path.dirname(current_dir) # Changed to one level up
27
  model_path = os.path.join(project_root, "models", "vit-base-beans")
28
 
29
  # Check if model path exists and has proper permissions
 
37
  raise PermissionError(f"No read permission for model path: {model_path}")
38
 
39
  # Check for required model files
40
+ required_files = ['config.json', 'model.safetensors'] # Updated for safetensors
41
  for file in required_files:
42
  file_path = os.path.join(model_path, file)
43
  if not os.path.exists(file_path):
 
49
 
50
  logger.info(f"Loading model from: {model_path}")
51
 
52
+ # Load model from local path with safetensors support
53
  config = AutoConfig.from_pretrained(model_path)
54
+ model = AutoModelForImageClassification.from_pretrained(
55
+ model_path,
56
+ local_files_only=True,
57
+ use_safetensors=True
58
+ )
59
 
60
  if torch.cuda.is_available():
61
  model = model.to('cuda')
 
92
  logger.error(f"Error in image_preprocessing: {str(e)}")
93
  return None
94
 
95
+ def predict_image(images, model):
96
  try:
97
+ preprocessed_img = image_preprocessing(images)
98
+ if preprocessed_img is None:
99
+ logger.error("Image preprocessing failed")
100
+ return None
101
+
102
+ images_tensor = torch.stack(preprocessed_img)
103
+ images_tensor = images_tensor.permute(0, 3, 1, 2)
104
+
105
+ with torch.no_grad():
106
+ predictions = model(images_tensor).logits
107
+ if torch.cuda.is_available():
108
+ predictions = predictions.cpu()
109
+ predictions = predictions.numpy()
110
+
111
  return predictions
112
 
113
  except Exception as e:
114
  logger.error(f"Error in predict_image: {str(e)}")
115
+ return None
116
 
117
+ def process_without_model(images):
118
+ """Fallback function when model prediction fails"""
119
+ logger.warning("Processing without model - returning all images as not struck")
120
+ return images # Return all images as not struck
121
 
122
+ def struck_images(word_images):
 
 
 
 
 
 
 
 
 
 
 
123
  try:
124
+ if model is None:
125
+ logger.warning("Model not initialized, processing without model")
126
+ return word_images
127
+
128
+ predictions = predict_image(word_images, model)
129
+ if predictions is None:
130
+ logger.warning("Predictions failed, processing without model")
131
+ return word_images
132
+
133
+ not_struck = []
134
  for i in range(len(predictions)):
135
+ if predictions[i].argmax() == 0: # Assuming 0 is the "not struck" class
136
+ not_struck.append(word_images[i])
137
+
138
+ if not not_struck:
139
+ logger.warning("No non-struck images found, returning all images")
140
+ return word_images
141
+
142
  return not_struck
143
+
144
  except Exception as e:
145
+ logger.error(f"Error in struck_images: {str(e)}")
146
+ return word_images # Return all images on error