yamanavijayavardhan commited on
Commit
8dd6f8c
·
1 Parent(s): 2ee994f

update_new_new_new_new_new

Browse files
Files changed (2) hide show
  1. HTR/strike.py +22 -2
  2. HTR/word.py +68 -91
HTR/strike.py CHANGED
@@ -80,6 +80,12 @@ def struck_images(image_paths):
80
  if img is None:
81
  logger.error(f"Failed to read image: {img_path}")
82
  continue
 
 
 
 
 
 
83
 
84
  # Process the image
85
  processed = process_single_image(img)
@@ -108,8 +114,22 @@ def process_single_image(img):
108
  if len(img.shape) == 3:
109
  img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
110
 
111
- # Apply thresholding
112
- _, binary = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  return binary
115
 
 
80
  if img is None:
81
  logger.error(f"Failed to read image: {img_path}")
82
  continue
83
+
84
+ # Resize if image is too small
85
+ min_size = 800
86
+ if img.shape[0] < min_size or img.shape[1] < min_size:
87
+ scale = min_size / min(img.shape[0], img.shape[1])
88
+ img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
89
 
90
  # Process the image
91
  processed = process_single_image(img)
 
114
  if len(img.shape) == 3:
115
  img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
116
 
117
+ # Enhance contrast
118
+ img = cv2.equalizeHist(img)
119
+
120
+ # Denoise
121
+ img = cv2.fastNlMeansDenoising(img)
122
+
123
+ # Apply adaptive thresholding
124
+ binary = cv2.adaptiveThreshold(
125
+ img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
126
+ cv2.THRESH_BINARY, 21, 15
127
+ )
128
+
129
+ # Remove noise and smooth edges
130
+ kernel = np.ones((3,3), np.uint8)
131
+ binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
132
+ binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
133
 
134
  return binary
135
 
HTR/word.py CHANGED
@@ -144,115 +144,92 @@ def convert_image(img):
144
  logger.info("Starting image conversion...")
145
  logger.info(f"Input image shape: {img.shape}")
146
 
147
- # Ensure image is in correct format
148
  if len(img.shape) == 2: # If grayscale
149
  img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
150
  elif img.shape[2] == 4: # If RGBA
151
  img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
 
 
 
 
 
 
152
 
153
  img = img.astype(np.uint8)
154
  img_copy = np.copy(img)
155
- line_length = 250
156
- rect_image = img
157
-
158
- # Remove shadow
159
- logger.info("Removing shadow...")
160
- image1, image2_ = remove_shadow(rect_image)
161
-
162
- # Convert to grayscale
163
- logger.info("Converting to grayscale...")
164
- gray_ = cv2.cvtColor(image2_, cv2.COLOR_BGR2GRAY)
165
 
166
- # Convert to binary with adaptive thresholding
167
- logger.info("Converting to binary...")
168
- binary_image_ = cv2.adaptiveThreshold(
169
- gray_, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
170
- cv2.THRESH_BINARY_INV, 11, 2
171
- )
172
- inverted_binary_image_ = 255 - binary_image_
173
-
174
- binary_image1 = np.copy(inverted_binary_image_)
175
-
176
- y_height, x_width = rect_image.shape[:2]
177
- logger.info(f"Image dimensions: {x_width}x{y_height}")
178
-
179
- # Resize image
180
- new_width = 500*5
181
- new_height = 705*5
182
-
183
- x_scaling = x_width/new_width
184
- y_scaling = y_height/new_height
185
-
186
- logger.info("Resizing image...")
187
- rect_image = cv2.resize(rect_image, (new_width, new_height),
188
- interpolation=cv2.INTER_NEAREST)
189
 
190
- # Process resized image
191
- logger.info("Processing resized image...")
192
- image1, image2 = remove_shadow(rect_image)
193
- gray = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
194
- binary_image = cv2.adaptiveThreshold(
195
  gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
196
- cv2.THRESH_BINARY_INV, 11, 2
197
  )
198
- inverted_binary_image = 255 - binary_image
199
-
200
- # Apply morphological operations
201
- kernel = np.ones((2,2), np.uint8)
202
- erosion = cv2.erode(inverted_binary_image, kernel, iterations=1)
203
- dilation = cv2.dilate(erosion, kernel, iterations=1)
204
 
205
- new_image = np.copy(dilation)
206
- new_image = 255 - new_image
207
-
 
 
 
 
208
  # Find text regions
209
- kernel = np.ones((1,250), np.uint8)
210
- dilation_1 = cv2.dilate(dilation, kernel, iterations=2)
 
211
 
212
- contours, _ = cv2.findContours(dilation_1, cv2.RETR_EXTERNAL,
213
- cv2.CHAIN_APPROX_SIMPLE)
 
 
 
 
 
214
 
215
- line = []
216
- logger.info(f"Found {len(contours)} contours")
217
 
218
- for i in reversed(contours):
219
- x, y, w, h = cv2.boundingRect(i)
220
- if cv2.contourArea(i) < 20 or h < 10:
221
- continue
 
 
222
 
223
- cv2.drawContours(new_image, [i], -1, (0), 2)
224
- final_image_ = np.zeros_like(binary_image)
225
- cv2.drawContours(final_image_, [i], 0, (255), -1)
226
- line_image = cv2.bitwise_and(final_image_, dilation)
227
- line.extend(analise(line_image, binary_image1, x_scaling, y_scaling))
228
-
229
- count = 0
230
- kernel1 = np.ones((8,8), np.uint8)
231
- word__image = []
232
-
233
- logger.info(f"Processing {len(line)} lines")
234
- for line_image in line:
235
- dilation_2 = cv2.dilate(line_image, kernel1, iterations=2)
236
- contours1, _ = cv2.findContours(dilation_2, cv2.RETR_EXTERNAL,
237
- cv2.CHAIN_APPROX_SIMPLE)
238
- sorted_contours = sorted(contours1, key=lambda c: cv2.boundingRect(c)[0])
239
-
240
- for j in sorted_contours:
241
- x1, y1, w1, h1 = cv2.boundingRect(j)
242
- if w1 < 5 or h1 < 5: # Skip very small regions
243
- continue
244
-
245
- final_image = line_image[y1:y1+h1, x1:x1+w1]
246
- final_image = 255 - final_image
247
 
248
- # Save the word image to the temporary directory
249
- word_path = os.path.join(images_dir, f'word_{count}.png')
250
- cv2.imwrite(word_path, final_image)
251
- word__image.append(word_path)
252
- count += 1
253
-
254
- logger.info(f"Extracted {count} words")
255
- return word__image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
 
257
  except Exception as e:
258
  logger.error(f"Error in convert_image: {str(e)}")
 
144
  logger.info("Starting image conversion...")
145
  logger.info(f"Input image shape: {img.shape}")
146
 
147
+ # Ensure image is in correct format and size
148
  if len(img.shape) == 2: # If grayscale
149
  img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
150
  elif img.shape[2] == 4: # If RGBA
151
  img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
152
+
153
+ # Resize if image is too small
154
+ min_size = 800
155
+ if img.shape[0] < min_size or img.shape[1] < min_size:
156
+ scale = min_size / min(img.shape[0], img.shape[1])
157
+ img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
158
 
159
  img = img.astype(np.uint8)
160
  img_copy = np.copy(img)
 
 
 
 
 
 
 
 
 
 
161
 
162
+ # Enhanced preprocessing
163
+ # 1. Denoise
164
+ img = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21)
165
+
166
+ # 2. Convert to grayscale
167
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
+ # 3. Apply adaptive thresholding with different parameters
170
+ binary = cv2.adaptiveThreshold(
 
 
 
171
  gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
172
+ cv2.THRESH_BINARY_INV, 21, 15
173
  )
 
 
 
 
 
 
174
 
175
+ # 4. Remove noise
176
+ kernel = np.ones((3,3), np.uint8)
177
+ binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
178
+
179
+ # 5. Dilate to connect text components
180
+ binary = cv2.dilate(binary, kernel, iterations=1)
181
+
182
  # Find text regions
183
+ contours, hierarchy = cv2.findContours(
184
+ binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
185
+ )
186
 
187
+ # Filter and sort contours by area
188
+ valid_contours = []
189
+ for cnt in contours:
190
+ x, y, w, h = cv2.boundingRect(cnt)
191
+ area = cv2.contourArea(cnt)
192
+ if area > 100 and w > 10 and h > 10: # Minimum size thresholds
193
+ valid_contours.append(cnt)
194
 
195
+ logger.info(f"Found {len(valid_contours)} valid text regions")
 
196
 
197
+ # Process each text region
198
+ word_images = []
199
+ for i, cnt in enumerate(valid_contours):
200
+ try:
201
+ # Get bounding box
202
+ x, y, w, h = cv2.boundingRect(cnt)
203
 
204
+ # Add padding
205
+ padding = 10
206
+ x = max(0, x - padding)
207
+ y = max(0, y - padding)
208
+ w = min(img.shape[1] - x, w + 2*padding)
209
+ h = min(img.shape[0] - y, h + 2*padding)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
+ # Extract region
212
+ roi = gray[y:y+h, x:x+w]
213
+
214
+ # Enhance contrast
215
+ roi = cv2.equalizeHist(roi)
216
+
217
+ # Binarize
218
+ _, roi_bin = cv2.threshold(
219
+ roi, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
220
+ )
221
+
222
+ # Save the word image
223
+ word_path = os.path.join(images_dir, f'word_{i}.png')
224
+ cv2.imwrite(word_path, roi_bin)
225
+ word_images.append(word_path)
226
+
227
+ except Exception as e:
228
+ logger.error(f"Error processing contour {i}: {str(e)}")
229
+ continue
230
+
231
+ logger.info(f"Successfully extracted {len(word_images)} word images")
232
+ return word_images
233
 
234
  except Exception as e:
235
  logger.error(f"Error in convert_image: {str(e)}")