Spaces:

yamanavijayavardhan
/

answer-grading-app

Sleeping

App Files Files Community

yamanavijayavardhan commited on Apr 2

Commit

8dd6f8c

1 Parent(s): 2ee994f

update_new_new_new_new_new

Browse files

Files changed (2) hide show

HTR/strike.py +22 -2
HTR/word.py +68 -91

HTR/strike.py CHANGED Viewed

@@ -80,6 +80,12 @@ def struck_images(image_paths):
                 if img is None:
                     logger.error(f"Failed to read image: {img_path}")
                     continue
                 # Process the image
                 processed = process_single_image(img)
@@ -108,8 +114,22 @@ def process_single_image(img):
         if len(img.shape) == 3:
             img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        # Apply thresholding
-        _, binary = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
         return binary

                 if img is None:
                     logger.error(f"Failed to read image: {img_path}")
                     continue
+                # Resize if image is too small
+                min_size = 800
+                if img.shape[0] < min_size or img.shape[1] < min_size:
+                    scale = min_size / min(img.shape[0], img.shape[1])
+                    img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
                 # Process the image
                 processed = process_single_image(img)
         if len(img.shape) == 3:
             img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        # Enhance contrast
+        img = cv2.equalizeHist(img)
+        # Denoise
+        img = cv2.fastNlMeansDenoising(img)
+        # Apply adaptive thresholding
+        binary = cv2.adaptiveThreshold(
+            img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+            cv2.THRESH_BINARY, 21, 15
+        )
+        # Remove noise and smooth edges
+        kernel = np.ones((3,3), np.uint8)
+        binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
+        binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
         return binary

HTR/word.py CHANGED Viewed

@@ -144,115 +144,92 @@ def convert_image(img):
         logger.info("Starting image conversion...")
         logger.info(f"Input image shape: {img.shape}")
-        # Ensure image is in correct format
         if len(img.shape) == 2:  # If grayscale
             img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
         elif img.shape[2] == 4:  # If RGBA
             img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
         img = img.astype(np.uint8)
         img_copy = np.copy(img)
-        line_length = 250
-        rect_image = img
-        # Remove shadow
-        logger.info("Removing shadow...")
-        image1, image2_ = remove_shadow(rect_image)
-        # Convert to grayscale
-        logger.info("Converting to grayscale...")
-        gray_ = cv2.cvtColor(image2_, cv2.COLOR_BGR2GRAY)
-        # Convert to binary with adaptive thresholding
-        logger.info("Converting to binary...")
-        binary_image_ = cv2.adaptiveThreshold(
-            gray_, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-            cv2.THRESH_BINARY_INV, 11, 2
-        )
-        inverted_binary_image_ = 255 - binary_image_
-        binary_image1 = np.copy(inverted_binary_image_)
-        y_height, x_width = rect_image.shape[:2]
-        logger.info(f"Image dimensions: {x_width}x{y_height}")
-        # Resize image
-        new_width = 500*5
-        new_height = 705*5
-        x_scaling = x_width/new_width
-        y_scaling = y_height/new_height
-        logger.info("Resizing image...")
-        rect_image = cv2.resize(rect_image, (new_width, new_height),
-                              interpolation=cv2.INTER_NEAREST)
-        # Process resized image
-        logger.info("Processing resized image...")
-        image1, image2 = remove_shadow(rect_image)
-        gray = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
-        binary_image = cv2.adaptiveThreshold(
             gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-            cv2.THRESH_BINARY_INV, 11, 2
         )
-        inverted_binary_image = 255 - binary_image
-        # Apply morphological operations
-        kernel = np.ones((2,2), np.uint8)
-        erosion = cv2.erode(inverted_binary_image, kernel, iterations=1)
-        dilation = cv2.dilate(erosion, kernel, iterations=1)
-        new_image = np.copy(dilation)
-        new_image = 255 - new_image
         # Find text regions
-        kernel = np.ones((1,250), np.uint8)
-        dilation_1 = cv2.dilate(dilation, kernel, iterations=2)
-        contours, _ = cv2.findContours(dilation_1, cv2.RETR_EXTERNAL,
-                                     cv2.CHAIN_APPROX_SIMPLE)
-        line = []
-        logger.info(f"Found {len(contours)} contours")
-        for i in reversed(contours):
-            x, y, w, h = cv2.boundingRect(i)
-            if cv2.contourArea(i) < 20 or h < 10:
-                continue
-            cv2.drawContours(new_image, [i], -1, (0), 2)
-            final_image_ = np.zeros_like(binary_image)
-            cv2.drawContours(final_image_, [i], 0, (255), -1)
-            line_image = cv2.bitwise_and(final_image_, dilation)
-            line.extend(analise(line_image, binary_image1, x_scaling, y_scaling))
-        count = 0
-        kernel1 = np.ones((8,8), np.uint8)
-        word__image = []
-        logger.info(f"Processing {len(line)} lines")
-        for line_image in line:
-            dilation_2 = cv2.dilate(line_image, kernel1, iterations=2)
-            contours1, _ = cv2.findContours(dilation_2, cv2.RETR_EXTERNAL,
-                                          cv2.CHAIN_APPROX_SIMPLE)
-            sorted_contours = sorted(contours1, key=lambda c: cv2.boundingRect(c)[0])
-            for j in sorted_contours:
-                x1, y1, w1, h1 = cv2.boundingRect(j)
-                if w1 < 5 or h1 < 5:  # Skip very small regions
-                    continue
-                final_image = line_image[y1:y1+h1, x1:x1+w1]
-                final_image = 255 - final_image
-                # Save the word image to the temporary directory
-                word_path = os.path.join(images_dir, f'word_{count}.png')
-                cv2.imwrite(word_path, final_image)
-                word__image.append(word_path)
-                count += 1
-        logger.info(f"Extracted {count} words")
-        return word__image
     except Exception as e:
         logger.error(f"Error in convert_image: {str(e)}")

         logger.info("Starting image conversion...")
         logger.info(f"Input image shape: {img.shape}")
+        # Ensure image is in correct format and size
         if len(img.shape) == 2:  # If grayscale
             img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
         elif img.shape[2] == 4:  # If RGBA
             img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
+        # Resize if image is too small
+        min_size = 800
+        if img.shape[0] < min_size or img.shape[1] < min_size:
+            scale = min_size / min(img.shape[0], img.shape[1])
+            img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
         img = img.astype(np.uint8)
         img_copy = np.copy(img)
+        # Enhanced preprocessing
+        # 1. Denoise
+        img = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 21)
+        # 2. Convert to grayscale
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        # 3. Apply adaptive thresholding with different parameters
+        binary = cv2.adaptiveThreshold(
             gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+            cv2.THRESH_BINARY_INV, 21, 15
         )
+        # 4. Remove noise
+        kernel = np.ones((3,3), np.uint8)
+        binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
+        # 5. Dilate to connect text components
+        binary = cv2.dilate(binary, kernel, iterations=1)
         # Find text regions
+        contours, hierarchy = cv2.findContours(
+            binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
+        )
+        # Filter and sort contours by area
+        valid_contours = []
+        for cnt in contours:
+            x, y, w, h = cv2.boundingRect(cnt)
+            area = cv2.contourArea(cnt)
+            if area > 100 and w > 10 and h > 10:  # Minimum size thresholds
+                valid_contours.append(cnt)
+        logger.info(f"Found {len(valid_contours)} valid text regions")
+        # Process each text region
+        word_images = []
+        for i, cnt in enumerate(valid_contours):
+            try:
+                # Get bounding box
+                x, y, w, h = cv2.boundingRect(cnt)
+                # Add padding
+                padding = 10
+                x = max(0, x - padding)
+                y = max(0, y - padding)
+                w = min(img.shape[1] - x, w + 2*padding)
+                h = min(img.shape[0] - y, h + 2*padding)
+                # Extract region
+                roi = gray[y:y+h, x:x+w]
+                # Enhance contrast
+                roi = cv2.equalizeHist(roi)
+                # Binarize
+                _, roi_bin = cv2.threshold(
+                    roi, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
+                )
+                # Save the word image
+                word_path = os.path.join(images_dir, f'word_{i}.png')
+                cv2.imwrite(word_path, roi_bin)
+                word_images.append(word_path)
+            except Exception as e:
+                logger.error(f"Error processing contour {i}: {str(e)}")
+                continue
+        logger.info(f"Successfully extracted {len(word_images)} word images")
+        return word_images
     except Exception as e:
         logger.error(f"Error in convert_image: {str(e)}")