yamanavijayavardhan commited on
Commit
2ad0600
·
1 Parent(s): 26f855a

update_new_new_new

Browse files
Files changed (3) hide show
  1. HTR/app.py +70 -4
  2. HTR/word.py +118 -169
  3. main.py +26 -26
HTR/app.py CHANGED
@@ -1,33 +1,99 @@
1
  import cv2
2
  import os
3
  import tempfile
 
 
 
 
 
 
4
 
5
  from HTR.word import convert_image
6
  from HTR.strike import struck_images
7
  from HTR.hcr import text
8
  from HTR.spell_and_gramer_check import spell_grammer
9
 
10
- # Define a function to extract text from an image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def extract_text_from_image(img_path):
12
  try:
13
  # Ensure the image exists
14
  if not os.path.exists(img_path):
15
- raise FileNotFoundError(f"Image file not found: {img_path}")
 
16
 
17
  # Read the image
18
  img = cv2.imread(img_path)
19
  if img is None:
20
- raise ValueError(f"Failed to read image: {img_path}")
 
 
 
 
 
 
 
 
 
 
 
 
21
 
 
 
 
 
 
 
 
22
  # Process the image
 
23
  imgs = convert_image(img)
 
 
 
 
 
 
 
24
  images_path = struck_images(imgs)
 
 
 
 
 
25
  t = text(images_path)
 
 
 
 
 
26
  t = spell_grammer(t)
27
 
 
28
  return t
 
29
  except Exception as e:
30
- print(f"Error in extract_text_from_image: {str(e)}")
31
  return ""
32
 
33
  # extract_text_from_image("ans_image/1.jpg")
 
1
  import cv2
2
  import os
3
  import tempfile
4
+ import logging
5
+ import numpy as np
6
+
7
+ # Set up logging
8
+ logging.basicConfig(level=logging.INFO)
9
+ logger = logging.getLogger(__name__)
10
 
11
  from HTR.word import convert_image
12
  from HTR.strike import struck_images
13
  from HTR.hcr import text
14
  from HTR.spell_and_gramer_check import spell_grammer
15
 
16
+ def preprocess_image(img):
17
+ """Preprocess image to improve text detection"""
18
+ try:
19
+ # Convert to grayscale
20
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
21
+
22
+ # Apply adaptive thresholding
23
+ binary = cv2.adaptiveThreshold(
24
+ gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
25
+ cv2.THRESH_BINARY_INV, 11, 2
26
+ )
27
+
28
+ # Denoise
29
+ denoised = cv2.fastNlMeansDenoising(binary)
30
+
31
+ # Convert back to BGR
32
+ return cv2.cvtColor(denoised, cv2.COLOR_GRAY2BGR)
33
+ except Exception as e:
34
+ logger.error(f"Error in preprocess_image: {str(e)}")
35
+ return img
36
+
37
  def extract_text_from_image(img_path):
38
  try:
39
  # Ensure the image exists
40
  if not os.path.exists(img_path):
41
+ logger.error(f"Image file not found: {img_path}")
42
+ return ""
43
 
44
  # Read the image
45
  img = cv2.imread(img_path)
46
  if img is None:
47
+ logger.error(f"Failed to read image: {img_path}")
48
+ return ""
49
+
50
+ # Log image properties
51
+ logger.info(f"Processing image: {img_path}")
52
+ logger.info(f"Image shape: {img.shape}")
53
+ logger.info(f"Image dtype: {img.dtype}")
54
+
55
+ # Convert image to RGB if needed
56
+ if len(img.shape) == 2: # If grayscale
57
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
58
+ elif img.shape[2] == 4: # If RGBA
59
+ img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
60
 
61
+ # Ensure image is in correct format
62
+ img = img.astype(np.uint8)
63
+
64
+ # Preprocess image
65
+ logger.info("Preprocessing image...")
66
+ img = preprocess_image(img)
67
+
68
  # Process the image
69
+ logger.info("Converting image...")
70
  imgs = convert_image(img)
71
+ if not imgs:
72
+ logger.error("No text regions detected in image")
73
+ return ""
74
+
75
+ logger.info(f"Found {len(imgs)} text regions")
76
+
77
+ logger.info("Processing struck images...")
78
  images_path = struck_images(imgs)
79
+ if not images_path:
80
+ logger.error("No valid text regions after strike processing")
81
+ return ""
82
+
83
+ logger.info("Extracting text...")
84
  t = text(images_path)
85
+ if not t:
86
+ logger.error("No text extracted from image")
87
+ return ""
88
+
89
+ logger.info("Spell checking...")
90
  t = spell_grammer(t)
91
 
92
+ logger.info(f"Extracted text: {t}")
93
  return t
94
+
95
  except Exception as e:
96
+ logger.error(f"Error in extract_text_from_image: {str(e)}")
97
  return ""
98
 
99
  # extract_text_from_image("ans_image/1.jpg")
HTR/word.py CHANGED
@@ -4,7 +4,11 @@ import cv2
4
  import sys
5
  import os
6
  import tempfile
 
7
 
 
 
 
8
 
9
  cordinates =[]
10
 
@@ -38,21 +42,22 @@ def four_point_transform(image, pts):
38
 
39
  return warped
40
  except Exception as e:
41
- print(f"Error in four_point_transform: {str(e)}")
42
  return image
43
 
44
 
45
  def remove_shadow(image):
46
  try:
47
  rgb_planes = cv2.split(image)
48
-
49
  result_planes = []
50
  result_norm_planes = []
 
51
  for plane in rgb_planes:
52
  dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
53
  bg_img = cv2.medianBlur(dilated_img, 21)
54
  diff_img = 255 - cv2.absdiff(plane, bg_img)
55
- norm_img = cv2.normalize(diff_img,None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
 
56
  result_planes.append(diff_img)
57
  result_norm_planes.append(norm_img)
58
 
@@ -61,70 +66,58 @@ def remove_shadow(image):
61
 
62
  return result, result_norm
63
  except Exception as e:
64
- print(f"Error in remove_shadow: {str(e)}")
65
  return image, image
66
 
67
 
68
 
69
 
70
- def analise(image):
71
- global line, binary_image1, x_scaling , y_scaling
72
  try:
73
- kernel = np.ones((1,250),np.uint8)
74
-
75
- dilation = cv2.dilate(image, kernel, iterations = 2)
76
-
77
- # cv2.namedWindow("Image", cv2.WINDOW_NORMAL)
78
- # cv2.imshow('Image',dilation)
79
- # cv2.waitKey(0)
80
 
81
  contours, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
82
 
83
  for i in reversed(contours):
84
- x, y, w, h = cv2.boundingRect(i)
85
- if cv2.contourArea(i)<20 :
86
- continue
87
- elif h < 8:
88
- continue
89
- else:
90
- scaling_factor_in_y = 0.5
91
- scaling_factor_in_x = 0
92
- resized_contour = i.copy()
93
-
94
- resized_contour = i * [x_scaling, y_scaling]
95
-
96
- resized_contour = resized_contour.astype(int)
97
- final_image__ = np.zeros_like(binary_image1)
98
- cv2.drawContours(final_image__, [resized_contour], 0, (255), -1)
99
-
100
- kernel_dil = np.ones((3,3),np.uint8)
101
- final_image__ = cv2.dilate(final_image__,kernel_dil,iterations = 3)
102
-
103
-
104
- line_image_final = cv2.bitwise_and(final_image__, binary_image1)
105
- line.append(line_image_final)
106
- # cv2.namedWindow("Line image", cv2.WINDOW_NORMAL)
107
- # cv2.imshow('Line image',line_image_final)
108
- # cv2.waitKey(0)
109
-
110
 
111
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  except Exception as e:
113
- print(f"Error in analise: {str(e)}")
114
-
115
- def image_resize_and_errosion(image):
116
-
117
- height, width = image.shape[:2]
118
- height = height + 1 * height
119
- height = int(height)
120
-
121
- resized_image = cv2.resize(image, (width, height))
122
-
123
- kernel = np.ones((13,1),np.uint8)
124
-
125
- erosion = cv2.erode(resized_image,kernel,iterations = 1)
126
 
127
- return erosion
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
 
130
  x_scaling = 0
@@ -136,166 +129,122 @@ count = 0
136
 
137
  def convert_image(img):
138
  try:
139
- folder_path = 'images'
140
-
141
- for filename in os.listdir(folder_path):
142
- file_path = os.path.join(folder_path, filename)
143
- try:
144
- if os.path.isfile(file_path):
145
- os.remove(file_path)
146
- except Exception as e:
147
- print(f"Error deleting file {file_path}: {e}")
148
-
149
 
 
 
150
 
151
- global x_scaling,y_scaling,binary_image1,line,line_lenght,count
152
- # img = cv2.imread(image_file)
 
 
 
 
 
153
  img_copy = np.copy(img)
154
- line_lenght = 250
155
  rect_image = img
156
 
157
- # removing the shadow in the image
 
158
  image1, image2_ = remove_shadow(rect_image)
159
 
160
- # converting into grayscale
161
- gray_ = cv2.cvtColor(image2_,cv2.COLOR_BGR2GRAY)
 
162
 
163
- # cv2.namedWindow("grayscale image", cv2.WINDOW_NORMAL)
164
- # cv2.imshow('grayscale image',gray_)
165
- # cv2.waitKey(0)
166
-
167
- # convrting into binaryimage
168
- _, binary_image_ = cv2.threshold(gray_, 200, 255, cv2.THRESH_BINARY)
169
- # cv2.namedWindow("binary image", cv2.WINDOW_NORMAL)
170
- # cv2.imshow('binary image',binary_image_)
171
- # cv2.waitKey(0)
172
-
173
  inverted_binary_image_ = 255 - binary_image_
174
 
175
  binary_image1 = np.copy(inverted_binary_image_)
176
 
177
- y_height ,x_width= rect_image.shape[:2]
 
178
 
179
- # print("image width, height =", x_width, y_height)
180
-
181
- # resizing the image
182
  new_width = 500*5
183
  new_height = 705*5
184
 
185
  x_scaling = x_width/new_width
186
  y_scaling = y_height/new_height
187
 
188
- # print("After resizing width, height", new_width , new_height)
189
- rect_image = cv2.resize(rect_image, (new_width, new_height), interpolation=cv2.INTER_NEAREST)
190
- # cv2.namedWindow("resized image", cv2.WINDOW_NORMAL)
191
- # cv2.imshow('resized image',rect_image)
192
- # cv2.waitKey(0)
193
 
194
- # removing the shadow in the image
 
195
  image1, image2 = remove_shadow(rect_image)
196
-
197
- # converting into grayscale
198
- gray = cv2.cvtColor(image2,cv2.COLOR_BGR2GRAY)
199
- # cv2.namedWindow("grayscale image", cv2.WINDOW_NORMAL)
200
- # cv2.imshow('grayscale image',gray)
201
- # cv2.waitKey(0)
202
-
203
- # convrting into binaryimage
204
- _, binary_image = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
205
- _, binary_image = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
206
- # cv2.namedWindow("binary image", cv2.WINDOW_NORMAL)
207
- # cv2.imshow('binary image',gray)
208
- # cv2.waitKey(0)
209
-
210
- # inverting the pixel
211
  inverted_binary_image = 255 - binary_image
212
 
213
- kernel = np.ones((2,2),np.uint8)
214
-
215
-
216
- # performing erosion to remove noise
217
- erosion = cv2.erode(inverted_binary_image,kernel,iterations = 1)
218
- # cv2.namedWindow("erosion", cv2.WINDOW_NORMAL)
219
- # cv2.imshow('erosion',erosion)
220
- # cv2.waitKey(0)
221
-
222
-
223
- # performing Dilution operatiom
224
- dilation = cv2.dilate(erosion,kernel,iterations = 1)
225
- # cv2.namedWindow("dilation", cv2.WINDOW_NORMAL)
226
- # cv2.imshow('dilation',erosion)
227
- # cv2.waitKey(0)
228
 
229
-
230
  new_image = np.copy(dilation)
231
  new_image = 255 - new_image
232
 
233
-
234
- # defining kernal size
235
- kernel = np.ones((1,250),np.uint8)
236
-
237
-
238
- # performing Dilution operatiom
239
- dilation_1 = cv2.dilate(dilation,kernel,iterations = 2)
240
- # cv2.namedWindow("dilation_1", cv2.WINDOW_NORMAL)
241
- # cv2.imshow('dilation_1',dilation_1)
242
- # cv2.waitKey(0)
243
 
244
- contours, _ = cv2.findContours(dilation_1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
 
245
 
246
  line = []
247
- # line saparation
 
248
  for i in reversed(contours):
249
  x, y, w, h = cv2.boundingRect(i)
250
- if cv2.contourArea(i)<20:
251
- continue
252
- elif h < 10:
253
  continue
254
- else:
255
- cv2.drawContours(new_image, [i],-1,(0),2)
256
- final_image_ = np.zeros_like(binary_image)
257
- cv2.drawContours(final_image_, [i], 0, (255), -1)
258
-
259
- # cv2.namedWindow("final_image_", cv2.WINDOW_NORMAL)
260
- # cv2.imshow('final_image_',final_image_)
261
- # cv2.waitKey(0)
262
-
263
-
264
- line_image = cv2.bitwise_and(final_image_, dilation)
265
- # cv2.namedWindow("line_image", cv2.WINDOW_NORMAL)
266
- # cv2.imshow('line_image',line_image)
267
- # cv2.waitKey(0)
268
-
269
-
270
- analise(line_image)
271
 
 
 
 
 
 
272
 
273
  count = 0
274
- kernel1 = np.ones((8,8),np.uint8)
275
- word__image = [] # newly added
 
 
276
  for line_image in line:
277
-
278
- dilation_2 = cv2.dilate(line_image,kernel1,iterations = 2)
279
-
280
- contours1, _ = cv2.findContours(dilation_2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
281
-
282
  sorted_contours = sorted(contours1, key=lambda c: cv2.boundingRect(c)[0])
283
 
284
  for j in sorted_contours:
285
- x1,y1,w1,h1 = cv2.boundingRect(j)
286
- final_image = line_image[y1:y1+h1,x1:x1+w1]
287
- image_name ="images/"+str(count)+".png"
 
 
288
  final_image = 255 - final_image
289
- word__image.append(final_image)# newly added
290
- # cv2.imwrite(image_name, final_image)
291
- count=count+1
292
 
293
- # cv2.waitKey(0)
294
- # cv2.destroyAllWindows()
295
  return word__image
296
 
297
  except Exception as e:
298
- print(f"Error in convert_image: {str(e)}")
299
  return []
300
 
301
 
 
4
  import sys
5
  import os
6
  import tempfile
7
+ import logging
8
 
9
+ # Set up logging
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
 
13
  cordinates =[]
14
 
 
42
 
43
  return warped
44
  except Exception as e:
45
+ logger.error(f"Error in four_point_transform: {str(e)}")
46
  return image
47
 
48
 
49
  def remove_shadow(image):
50
  try:
51
  rgb_planes = cv2.split(image)
 
52
  result_planes = []
53
  result_norm_planes = []
54
+
55
  for plane in rgb_planes:
56
  dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
57
  bg_img = cv2.medianBlur(dilated_img, 21)
58
  diff_img = 255 - cv2.absdiff(plane, bg_img)
59
+ norm_img = cv2.normalize(diff_img, None, alpha=0, beta=255,
60
+ norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
61
  result_planes.append(diff_img)
62
  result_norm_planes.append(norm_img)
63
 
 
66
 
67
  return result, result_norm
68
  except Exception as e:
69
+ logger.error(f"Error in remove_shadow: {str(e)}")
70
  return image, image
71
 
72
 
73
 
74
 
75
+ def analise(image, binary_image1, x_scaling, y_scaling):
 
76
  try:
77
+ line = []
78
+ kernel = np.ones((1,250), np.uint8)
79
+ dilation = cv2.dilate(image, kernel, iterations=2)
 
 
 
 
80
 
81
  contours, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
82
 
83
  for i in reversed(contours):
84
+ x, y, w, h = cv2.boundingRect(i)
85
+ if cv2.contourArea(i) < 20 or h < 8:
86
+ continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ scaling_factor_in_y = 0.5
89
+ scaling_factor_in_x = 0
90
+ resized_contour = i.copy()
91
+
92
+ resized_contour = i * [x_scaling, y_scaling]
93
+ resized_contour = resized_contour.astype(int)
94
+ final_image__ = np.zeros_like(binary_image1)
95
+ cv2.drawContours(final_image__, [resized_contour], 0, (255), -1)
96
+
97
+ kernel_dil = np.ones((3,3), np.uint8)
98
+ final_image__ = cv2.dilate(final_image__, kernel_dil, iterations=3)
99
+
100
+ line_image_final = cv2.bitwise_and(final_image__, binary_image1)
101
+ line.append(line_image_final)
102
+
103
+ return line
104
  except Exception as e:
105
+ logger.error(f"Error in analise: {str(e)}")
106
+ return []
 
 
 
 
 
 
 
 
 
 
 
107
 
108
+ def image_resize_and_erosion(image):
109
+ try:
110
+ height, width = image.shape[:2]
111
+ height = height + 1 * height
112
+ height = int(height)
113
+
114
+ resized_image = cv2.resize(image, (width, height))
115
+ kernel = np.ones((13,1), np.uint8)
116
+ erosion = cv2.erode(resized_image, kernel, iterations=1)
117
+ return erosion
118
+ except Exception as e:
119
+ logger.error(f"Error in image_resize_and_erosion: {str(e)}")
120
+ return image
121
 
122
 
123
  x_scaling = 0
 
129
 
130
  def convert_image(img):
131
  try:
132
+ # Create temporary directory for processing
133
+ temp_dir = tempfile.mkdtemp()
134
+ images_dir = os.path.join(temp_dir, 'images')
135
+ os.makedirs(images_dir, exist_ok=True)
 
 
 
 
 
 
136
 
137
+ logger.info("Starting image conversion...")
138
+ logger.info(f"Input image shape: {img.shape}")
139
 
140
+ # Ensure image is in correct format
141
+ if len(img.shape) == 2: # If grayscale
142
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
143
+ elif img.shape[2] == 4: # If RGBA
144
+ img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
145
+
146
+ img = img.astype(np.uint8)
147
  img_copy = np.copy(img)
148
+ line_length = 250
149
  rect_image = img
150
 
151
+ # Remove shadow
152
+ logger.info("Removing shadow...")
153
  image1, image2_ = remove_shadow(rect_image)
154
 
155
+ # Convert to grayscale
156
+ logger.info("Converting to grayscale...")
157
+ gray_ = cv2.cvtColor(image2_, cv2.COLOR_BGR2GRAY)
158
 
159
+ # Convert to binary with adaptive thresholding
160
+ logger.info("Converting to binary...")
161
+ binary_image_ = cv2.adaptiveThreshold(
162
+ gray_, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
163
+ cv2.THRESH_BINARY_INV, 11, 2
164
+ )
 
 
 
 
165
  inverted_binary_image_ = 255 - binary_image_
166
 
167
  binary_image1 = np.copy(inverted_binary_image_)
168
 
169
+ y_height, x_width = rect_image.shape[:2]
170
+ logger.info(f"Image dimensions: {x_width}x{y_height}")
171
 
172
+ # Resize image
 
 
173
  new_width = 500*5
174
  new_height = 705*5
175
 
176
  x_scaling = x_width/new_width
177
  y_scaling = y_height/new_height
178
 
179
+ logger.info("Resizing image...")
180
+ rect_image = cv2.resize(rect_image, (new_width, new_height),
181
+ interpolation=cv2.INTER_NEAREST)
 
 
182
 
183
+ # Process resized image
184
+ logger.info("Processing resized image...")
185
  image1, image2 = remove_shadow(rect_image)
186
+ gray = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
187
+ binary_image = cv2.adaptiveThreshold(
188
+ gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
189
+ cv2.THRESH_BINARY_INV, 11, 2
190
+ )
 
 
 
 
 
 
 
 
 
 
191
  inverted_binary_image = 255 - binary_image
192
 
193
+ # Apply morphological operations
194
+ kernel = np.ones((2,2), np.uint8)
195
+ erosion = cv2.erode(inverted_binary_image, kernel, iterations=1)
196
+ dilation = cv2.dilate(erosion, kernel, iterations=1)
 
 
 
 
 
 
 
 
 
 
 
197
 
 
198
  new_image = np.copy(dilation)
199
  new_image = 255 - new_image
200
 
201
+ # Find text regions
202
+ kernel = np.ones((1,250), np.uint8)
203
+ dilation_1 = cv2.dilate(dilation, kernel, iterations=2)
 
 
 
 
 
 
 
204
 
205
+ contours, _ = cv2.findContours(dilation_1, cv2.RETR_EXTERNAL,
206
+ cv2.CHAIN_APPROX_SIMPLE)
207
 
208
  line = []
209
+ logger.info(f"Found {len(contours)} contours")
210
+
211
  for i in reversed(contours):
212
  x, y, w, h = cv2.boundingRect(i)
213
+ if cv2.contourArea(i) < 20 or h < 10:
 
 
214
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
+ cv2.drawContours(new_image, [i], -1, (0), 2)
217
+ final_image_ = np.zeros_like(binary_image)
218
+ cv2.drawContours(final_image_, [i], 0, (255), -1)
219
+ line_image = cv2.bitwise_and(final_image_, dilation)
220
+ line.extend(analise(line_image, binary_image1, x_scaling, y_scaling))
221
 
222
  count = 0
223
+ kernel1 = np.ones((8,8), np.uint8)
224
+ word__image = []
225
+
226
+ logger.info(f"Processing {len(line)} lines")
227
  for line_image in line:
228
+ dilation_2 = cv2.dilate(line_image, kernel1, iterations=2)
229
+ contours1, _ = cv2.findContours(dilation_2, cv2.RETR_EXTERNAL,
230
+ cv2.CHAIN_APPROX_SIMPLE)
 
 
231
  sorted_contours = sorted(contours1, key=lambda c: cv2.boundingRect(c)[0])
232
 
233
  for j in sorted_contours:
234
+ x1, y1, w1, h1 = cv2.boundingRect(j)
235
+ if w1 < 5 or h1 < 5: # Skip very small regions
236
+ continue
237
+
238
+ final_image = line_image[y1:y1+h1, x1:x1+w1]
239
  final_image = 255 - final_image
240
+ word__image.append(final_image)
241
+ count += 1
 
242
 
243
+ logger.info(f"Extracted {count} words")
 
244
  return word__image
245
 
246
  except Exception as e:
247
+ logger.error(f"Error in convert_image: {str(e)}")
248
  return []
249
 
250
 
main.py CHANGED
@@ -44,14 +44,14 @@ except Exception as e:
44
 
45
  # Set up logging to both file and console
46
  try:
47
- logging.basicConfig(
48
- level=logging.INFO,
49
- format='%(asctime)s - %(levelname)s - %(message)s',
50
- handlers=[
51
- logging.FileHandler(log_file),
52
- logging.StreamHandler(sys.stdout)
53
- ]
54
- )
55
  except Exception as e:
56
  print(f"Warning: Could not set up file logging: {e}")
57
  # Fallback to console-only logging
@@ -107,7 +107,7 @@ for data in required_nltk_data:
107
  try:
108
  log_print(f"Downloading NLTK data: {data}")
109
  nltk.download(data, download_dir=nltk_data_dir)
110
- except Exception as e:
111
  log_print(f"Error downloading NLTK data {data}: {e}", "ERROR")
112
  raise
113
 
@@ -155,11 +155,11 @@ def compute_answers():
155
  return jsonify({"error": "Missing query file"}), 400
156
 
157
  try:
158
- queries = query_file.read().decode('utf-8').splitlines()
159
  if not queries:
160
  log_print("No queries found in file", "ERROR")
161
  return jsonify({"error": "No queries found in file"}), 400
162
- log_print(f"Received queries: {queries}")
163
  except UnicodeDecodeError:
164
  log_print("Invalid file encoding", "ERROR")
165
  return jsonify({"error": "Invalid file encoding"}), 400
@@ -177,12 +177,12 @@ def compute_answers():
177
  return jsonify({"error": "Missing answer CSV file"}), 400
178
 
179
  try:
180
- ans_csv_file = ans_csv_file.read().decode('utf-8').splitlines()
181
- c_answers = []
182
- for i in ans_csv_file:
183
- c_answers.append(i.split('\\n'))
184
- log_print(f"Processed CSV answers: {c_answers}")
185
- return jsonify({"answers": c_answers}), 200
186
  except UnicodeDecodeError:
187
  log_print("Invalid CSV file encoding", "ERROR")
188
  return jsonify({"error": "Invalid CSV file encoding"}), 400
@@ -253,14 +253,14 @@ def compute_marks():
253
  return jsonify({"error": "No answers provided"}), 400
254
 
255
  try:
256
- log_print("=== Processing Answers ===")
257
- log_print(f"Received answers: {a}")
258
- a = json.loads(a)
259
- answers = []
260
- for i in a:
261
- ans = i.split('\n\n')
262
- answers.append(ans)
263
- log_print(f"Processed answers structure: {answers}")
264
  except json.JSONDecodeError:
265
  log_print("Invalid JSON format in answers", "ERROR")
266
  return jsonify({"error": "Invalid JSON format in answers"}), 400
@@ -378,7 +378,7 @@ def compute_marks():
378
  log_print("\nFinal Results:")
379
  for student, marks_list in s_marks.items():
380
  log_print(f"{student}: {marks_list}")
381
-
382
  return jsonify({"message": s_marks}), 200
383
 
384
  except Exception as e:
 
44
 
45
  # Set up logging to both file and console
46
  try:
47
+ logging.basicConfig(
48
+ level=logging.INFO,
49
+ format='%(asctime)s - %(levelname)s - %(message)s',
50
+ handlers=[
51
+ logging.FileHandler(log_file),
52
+ logging.StreamHandler(sys.stdout)
53
+ ]
54
+ )
55
  except Exception as e:
56
  print(f"Warning: Could not set up file logging: {e}")
57
  # Fallback to console-only logging
 
107
  try:
108
  log_print(f"Downloading NLTK data: {data}")
109
  nltk.download(data, download_dir=nltk_data_dir)
110
+ except Exception as e:
111
  log_print(f"Error downloading NLTK data {data}: {e}", "ERROR")
112
  raise
113
 
 
155
  return jsonify({"error": "Missing query file"}), 400
156
 
157
  try:
158
+ queries = query_file.read().decode('utf-8').splitlines()
159
  if not queries:
160
  log_print("No queries found in file", "ERROR")
161
  return jsonify({"error": "No queries found in file"}), 400
162
+ log_print(f"Received queries: {queries}")
163
  except UnicodeDecodeError:
164
  log_print("Invalid file encoding", "ERROR")
165
  return jsonify({"error": "Invalid file encoding"}), 400
 
177
  return jsonify({"error": "Missing answer CSV file"}), 400
178
 
179
  try:
180
+ ans_csv_file = ans_csv_file.read().decode('utf-8').splitlines()
181
+ c_answers = []
182
+ for i in ans_csv_file:
183
+ c_answers.append(i.split('\\n'))
184
+ log_print(f"Processed CSV answers: {c_answers}")
185
+ return jsonify({"answers": c_answers}), 200
186
  except UnicodeDecodeError:
187
  log_print("Invalid CSV file encoding", "ERROR")
188
  return jsonify({"error": "Invalid CSV file encoding"}), 400
 
253
  return jsonify({"error": "No answers provided"}), 400
254
 
255
  try:
256
+ log_print("=== Processing Answers ===")
257
+ log_print(f"Received answers: {a}")
258
+ a = json.loads(a)
259
+ answers = []
260
+ for i in a:
261
+ ans = i.split('\n\n')
262
+ answers.append(ans)
263
+ log_print(f"Processed answers structure: {answers}")
264
  except json.JSONDecodeError:
265
  log_print("Invalid JSON format in answers", "ERROR")
266
  return jsonify({"error": "Invalid JSON format in answers"}), 400
 
378
  log_print("\nFinal Results:")
379
  for student, marks_list in s_marks.items():
380
  log_print(f"{student}: {marks_list}")
381
+
382
  return jsonify({"message": s_marks}), 200
383
 
384
  except Exception as e: