Commit
·
8dd6f8c
1
Parent(s):
2ee994f
update_new_new_new_new_new
Browse files- HTR/strike.py +22 -2
- HTR/word.py +68 -91
HTR/strike.py
CHANGED
@@ -80,6 +80,12 @@ def struck_images(image_paths):
|
|
80 |
if img is None:
|
81 |
logger.error(f"Failed to read image: {img_path}")
|
82 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
# Process the image
|
85 |
processed = process_single_image(img)
|
@@ -108,8 +114,22 @@ def process_single_image(img):
|
|
108 |
if len(img.shape) == 3:
|
109 |
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
110 |
|
111 |
-
#
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
return binary
|
115 |
|
|
|
80 |
if img is None:
|
81 |
logger.error(f"Failed to read image: {img_path}")
|
82 |
continue
|
83 |
+
|
84 |
+
# Resize if image is too small
|
85 |
+
min_size = 800
|
86 |
+
if img.shape[0] < min_size or img.shape[1] < min_size:
|
87 |
+
scale = min_size / min(img.shape[0], img.shape[1])
|
88 |
+
img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
|
89 |
|
90 |
# Process the image
|
91 |
processed = process_single_image(img)
|
|
|
114 |
if len(img.shape) == 3:
|
115 |
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
116 |
|
117 |
+
# Enhance contrast
|
118 |
+
img = cv2.equalizeHist(img)
|
119 |
+
|
120 |
+
# Denoise
|
121 |
+
img = cv2.fastNlMeansDenoising(img)
|
122 |
+
|
123 |
+
# Apply adaptive thresholding
|
124 |
+
binary = cv2.adaptiveThreshold(
|
125 |
+
img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
126 |
+
cv2.THRESH_BINARY, 21, 15
|
127 |
+
)
|
128 |
+
|
129 |
+
# Remove noise and smooth edges
|
130 |
+
kernel = np.ones((3,3), np.uint8)
|
131 |
+
binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
|
132 |
+
binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
|
133 |
|
134 |
return binary
|
135 |
|
HTR/word.py
CHANGED
@@ -144,115 +144,92 @@ def convert_image(img):
|
|
144 |
logger.info("Starting image conversion...")
|
145 |
logger.info(f"Input image shape: {img.shape}")
|
146 |
|
147 |
-
# Ensure image is in correct format
|
148 |
if len(img.shape) == 2: # If grayscale
|
149 |
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
150 |
elif img.shape[2] == 4: # If RGBA
|
151 |
img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
|
153 |
img = img.astype(np.uint8)
|
154 |
img_copy = np.copy(img)
|
155 |
-
line_length = 250
|
156 |
-
rect_image = img
|
157 |
-
|
158 |
-
# Remove shadow
|
159 |
-
logger.info("Removing shadow...")
|
160 |
-
image1, image2_ = remove_shadow(rect_image)
|
161 |
-
|
162 |
-
# Convert to grayscale
|
163 |
-
logger.info("Converting to grayscale...")
|
164 |
-
gray_ = cv2.cvtColor(image2_, cv2.COLOR_BGR2GRAY)
|
165 |
|
166 |
-
#
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
)
|
172 |
-
inverted_binary_image_ = 255 - binary_image_
|
173 |
-
|
174 |
-
binary_image1 = np.copy(inverted_binary_image_)
|
175 |
-
|
176 |
-
y_height, x_width = rect_image.shape[:2]
|
177 |
-
logger.info(f"Image dimensions: {x_width}x{y_height}")
|
178 |
-
|
179 |
-
# Resize image
|
180 |
-
new_width = 500*5
|
181 |
-
new_height = 705*5
|
182 |
-
|
183 |
-
x_scaling = x_width/new_width
|
184 |
-
y_scaling = y_height/new_height
|
185 |
-
|
186 |
-
logger.info("Resizing image...")
|
187 |
-
rect_image = cv2.resize(rect_image, (new_width, new_height),
|
188 |
-
interpolation=cv2.INTER_NEAREST)
|
189 |
|
190 |
-
#
|
191 |
-
|
192 |
-
image1, image2 = remove_shadow(rect_image)
|
193 |
-
gray = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
|
194 |
-
binary_image = cv2.adaptiveThreshold(
|
195 |
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
196 |
-
cv2.THRESH_BINARY_INV,
|
197 |
)
|
198 |
-
inverted_binary_image = 255 - binary_image
|
199 |
-
|
200 |
-
# Apply morphological operations
|
201 |
-
kernel = np.ones((2,2), np.uint8)
|
202 |
-
erosion = cv2.erode(inverted_binary_image, kernel, iterations=1)
|
203 |
-
dilation = cv2.dilate(erosion, kernel, iterations=1)
|
204 |
|
205 |
-
|
206 |
-
|
207 |
-
|
|
|
|
|
|
|
|
|
208 |
# Find text regions
|
209 |
-
|
210 |
-
|
|
|
211 |
|
212 |
-
|
213 |
-
|
|
|
|
|
|
|
|
|
|
|
214 |
|
215 |
-
|
216 |
-
logger.info(f"Found {len(contours)} contours")
|
217 |
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
|
|
|
|
222 |
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
count = 0
|
230 |
-
kernel1 = np.ones((8,8), np.uint8)
|
231 |
-
word__image = []
|
232 |
-
|
233 |
-
logger.info(f"Processing {len(line)} lines")
|
234 |
-
for line_image in line:
|
235 |
-
dilation_2 = cv2.dilate(line_image, kernel1, iterations=2)
|
236 |
-
contours1, _ = cv2.findContours(dilation_2, cv2.RETR_EXTERNAL,
|
237 |
-
cv2.CHAIN_APPROX_SIMPLE)
|
238 |
-
sorted_contours = sorted(contours1, key=lambda c: cv2.boundingRect(c)[0])
|
239 |
-
|
240 |
-
for j in sorted_contours:
|
241 |
-
x1, y1, w1, h1 = cv2.boundingRect(j)
|
242 |
-
if w1 < 5 or h1 < 5: # Skip very small regions
|
243 |
-
continue
|
244 |
-
|
245 |
-
final_image = line_image[y1:y1+h1, x1:x1+w1]
|
246 |
-
final_image = 255 - final_image
|
247 |
|
248 |
-
#
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
256 |
|
257 |
except Exception as e:
|
258 |
logger.error(f"Error in convert_image: {str(e)}")
|
|
|
144 |
logger.info("Starting image conversion...")
|
145 |
logger.info(f"Input image shape: {img.shape}")
|
146 |
|
147 |
+
# Ensure image is in correct format and size
|
148 |
if len(img.shape) == 2: # If grayscale
|
149 |
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
150 |
elif img.shape[2] == 4: # If RGBA
|
151 |
img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
|
152 |
+
|
153 |
+
# Resize if image is too small
|
154 |
+
min_size = 800
|
155 |
+
if img.shape[0] < min_size or img.shape[1] < min_size:
|
156 |
+
scale = min_size / min(img.shape[0], img.shape[1])
|
157 |
+
img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
|
158 |
|
159 |
img = img.astype(np.uint8)
|
160 |
img_copy = np.copy(img)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
|
162 |
+
# Enhanced preprocessing
|
163 |
+
# 1. Denoise
|
164 |
+
img = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21)
|
165 |
+
|
166 |
+
# 2. Convert to grayscale
|
167 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
|
169 |
+
# 3. Apply adaptive thresholding with different parameters
|
170 |
+
binary = cv2.adaptiveThreshold(
|
|
|
|
|
|
|
171 |
gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
172 |
+
cv2.THRESH_BINARY_INV, 21, 15
|
173 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
|
175 |
+
# 4. Remove noise
|
176 |
+
kernel = np.ones((3,3), np.uint8)
|
177 |
+
binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
|
178 |
+
|
179 |
+
# 5. Dilate to connect text components
|
180 |
+
binary = cv2.dilate(binary, kernel, iterations=1)
|
181 |
+
|
182 |
# Find text regions
|
183 |
+
contours, hierarchy = cv2.findContours(
|
184 |
+
binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
|
185 |
+
)
|
186 |
|
187 |
+
# Filter and sort contours by area
|
188 |
+
valid_contours = []
|
189 |
+
for cnt in contours:
|
190 |
+
x, y, w, h = cv2.boundingRect(cnt)
|
191 |
+
area = cv2.contourArea(cnt)
|
192 |
+
if area > 100 and w > 10 and h > 10: # Minimum size thresholds
|
193 |
+
valid_contours.append(cnt)
|
194 |
|
195 |
+
logger.info(f"Found {len(valid_contours)} valid text regions")
|
|
|
196 |
|
197 |
+
# Process each text region
|
198 |
+
word_images = []
|
199 |
+
for i, cnt in enumerate(valid_contours):
|
200 |
+
try:
|
201 |
+
# Get bounding box
|
202 |
+
x, y, w, h = cv2.boundingRect(cnt)
|
203 |
|
204 |
+
# Add padding
|
205 |
+
padding = 10
|
206 |
+
x = max(0, x - padding)
|
207 |
+
y = max(0, y - padding)
|
208 |
+
w = min(img.shape[1] - x, w + 2*padding)
|
209 |
+
h = min(img.shape[0] - y, h + 2*padding)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
210 |
|
211 |
+
# Extract region
|
212 |
+
roi = gray[y:y+h, x:x+w]
|
213 |
+
|
214 |
+
# Enhance contrast
|
215 |
+
roi = cv2.equalizeHist(roi)
|
216 |
+
|
217 |
+
# Binarize
|
218 |
+
_, roi_bin = cv2.threshold(
|
219 |
+
roi, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
|
220 |
+
)
|
221 |
+
|
222 |
+
# Save the word image
|
223 |
+
word_path = os.path.join(images_dir, f'word_{i}.png')
|
224 |
+
cv2.imwrite(word_path, roi_bin)
|
225 |
+
word_images.append(word_path)
|
226 |
+
|
227 |
+
except Exception as e:
|
228 |
+
logger.error(f"Error processing contour {i}: {str(e)}")
|
229 |
+
continue
|
230 |
+
|
231 |
+
logger.info(f"Successfully extracted {len(word_images)} word images")
|
232 |
+
return word_images
|
233 |
|
234 |
except Exception as e:
|
235 |
logger.error(f"Error in convert_image: {str(e)}")
|