yamanavijayavardhan commited on
Commit
51c49bc
·
1 Parent(s): 92ba605

Initial upload of answer grading application

Browse files
Dockerfile ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . /code
9
+
10
+ CMD ["python", "main.py"]
HTR/app.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+
3
+ from HTR.word import convert_image
4
+ from HTR.strike import struck_images
5
+ from HTR.hcr import text
6
+ from HTR.spell_and_gramer_check import spell_grammer
7
+
8
+ # Define a function to extract text from an image
9
+ def extract_text_from_image(img_path):
10
+ img = cv2.imread(img_path)
11
+ # print(img)
12
+ imgs = convert_image(img)
13
+ images_path = struck_images(imgs)
14
+ t = text(images_path)
15
+ # print("\n\n\n\n\n\n\n")
16
+ # print(t)
17
+ t = spell_grammer(t)
18
+ # t = text
19
+ # print("\n\n\n\n\n\n\n")
20
+ # print(t)
21
+ return t
22
+
23
+ # extract_text_from_image("ans_image/1.jpg")
HTR/hcr.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
2
+ from PIL import Image
3
+ import cv2
4
+
5
+
6
+ MODEL_NAME = "microsoft/trocr-large-handwritten"
7
+ processor = TrOCRProcessor.from_pretrained(MODEL_NAME)
8
+ model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME)
9
+
10
+
11
+ def text(image_cv):
12
+ t = ""
13
+ for i in image_cv:
14
+ img_rgb = cv2.cvtColor(i, cv2.COLOR_BGR2RGB)
15
+ image = Image.fromarray(img_rgb)
16
+
17
+ # image = Image.open(i).convert("RGB")
18
+ pixel_values = processor(image, return_tensors="pt").pixel_values
19
+ generated_ids = model.generate(pixel_values)
20
+
21
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
22
+ t = t+generated_text.replace(" ", "")+ " "
23
+
24
+ # print(t)
25
+
26
+ return t
27
+
HTR/spell_and_gramer_check.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import language_tool_python
2
+ from spellchecker import SpellChecker
3
+
4
+ tool = language_tool_python.LanguageTool('en-US')
5
+
6
+ def check_grammar(answer):
7
+
8
+ my_matches = tool.check(answer)
9
+ corrected_text = tool.correct(answer)
10
+ return corrected_text
11
+
12
+ def correct_spelling(text):
13
+ spell = SpellChecker()
14
+ words = text.split()
15
+
16
+ # Find misspelled words
17
+ misspelled = spell.unknown(words)
18
+
19
+ # Correct misspelled words
20
+ corrected_text = []
21
+ for word in words:
22
+ if word in misspelled:
23
+ correction = spell.correction(word)
24
+ # If no correction found or correction is None, keep the original word
25
+ if correction is None:
26
+ corrected_text.append(word)
27
+ else:
28
+ corrected_text.append(correction)
29
+ else:
30
+ corrected_text.append(word)
31
+
32
+ return " ".join(map(str, corrected_text))
33
+
34
+ def spell_grammer(text):
35
+ spell_check_text = correct_spelling(text)
36
+
37
+ corrected_text = check_grammar(spell_check_text)
38
+
39
+ return corrected_text
HTR/strike.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import tensorflow as tf
4
+ import torch
5
+ import os
6
+ import cv2
7
+ from transformers import AutoModelForImageClassification
8
+
9
+ def image_preprocessing(image):
10
+ images=[]
11
+ for i in image:
12
+ binary_image = i
13
+ binary_image = cv2.resize(binary_image, (224, 224))
14
+ binary_image = cv2.merge([binary_image, binary_image, binary_image])
15
+ binary_image = binary_image/255
16
+ binary_image = torch.from_numpy(binary_image)
17
+ images.append(binary_image)
18
+ return images
19
+
20
+ def predict_image(image_path, model):
21
+ preprocessed_img = image_preprocessing(image_path)
22
+ images = torch.stack(preprocessed_img)
23
+ images = images.permute(0, 3, 1, 2)
24
+ predictions = model(images).logits.detach().numpy()
25
+ return predictions
26
+
27
+
28
+ model = AutoModelForImageClassification.from_pretrained("models/vit-base-beans")
29
+
30
+ def struck_images(word__image):
31
+
32
+
33
+ predictions = predict_image(word__image, model)
34
+
35
+ not_struck =[]
36
+ for i in range(len(predictions)):
37
+ if predictions[i].argmax().item() == 0:
38
+
39
+ not_struck.append(word__image[i])
40
+
41
+ # print(not_struck)
42
+ return not_struck
43
+
44
+
45
+ # struck_images()
HTR/word.py ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2
3
+ import matplotlib.pyplot as plt
4
+ import sys
5
+ import os
6
+
7
+
8
+ cordinates =[]
9
+
10
+
11
+
12
+ def four_point_transform(image, pts):
13
+ rect = pts
14
+ (tl, tr, br, bl) = rect
15
+
16
+ # Compute the width of the new image
17
+ widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
18
+ widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
19
+ maxWidth = max(int(widthA), int(widthB))
20
+
21
+ # Compute the height of the new image
22
+ heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
23
+ heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
24
+ maxHeight = max(int(heightA), int(heightB))
25
+
26
+ dst = np.array([
27
+ [0, 0],
28
+ [maxWidth - 1, 0],
29
+ [maxWidth - 1, maxHeight - 1],
30
+ [0, maxHeight - 1]], dtype="float32")
31
+
32
+ rect = np.array(rect, dtype="float32")
33
+
34
+ M = cv2.getPerspectiveTransform(rect, dst)
35
+ warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
36
+
37
+ return warped
38
+
39
+
40
+ def remove_shadow(image):
41
+ rgb_planes = cv2.split(image)
42
+
43
+ result_planes = []
44
+ result_norm_planes = []
45
+ for plane in rgb_planes:
46
+ dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
47
+ bg_img = cv2.medianBlur(dilated_img, 21)
48
+ diff_img = 255 - cv2.absdiff(plane, bg_img)
49
+ norm_img = cv2.normalize(diff_img,None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
50
+ result_planes.append(diff_img)
51
+ result_norm_planes.append(norm_img)
52
+
53
+ result = cv2.merge(result_planes)
54
+ result_norm = cv2.merge(result_norm_planes)
55
+
56
+ return result,result_norm
57
+
58
+
59
+
60
+
61
+ def analise(image):
62
+ global line, binary_image1, x_scaling , y_scaling
63
+ kernel = np.ones((1,250),np.uint8)
64
+
65
+ dilation = cv2.dilate(image, kernel, iterations = 2)
66
+
67
+ # cv2.namedWindow("Image", cv2.WINDOW_NORMAL)
68
+ # cv2.imshow('Image',dilation)
69
+ # cv2.waitKey(0)
70
+
71
+ contours, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
72
+
73
+ for i in reversed(contours):
74
+ x, y, w, h = cv2.boundingRect(i)
75
+ if cv2.contourArea(i)<20 :
76
+ continue
77
+ elif h < 8:
78
+ continue
79
+ else:
80
+ scaling_factor_in_y = 0.5
81
+ scaling_factor_in_x = 0
82
+ resized_contour = i.copy()
83
+
84
+ resized_contour = i * [x_scaling, y_scaling]
85
+
86
+ resized_contour = resized_contour.astype(int)
87
+ final_image__ = np.zeros_like(binary_image1)
88
+ cv2.drawContours(final_image__, [resized_contour], 0, (255), -1)
89
+
90
+ kernel_dil = np.ones((3,3),np.uint8)
91
+ final_image__ = cv2.dilate(final_image__,kernel_dil,iterations = 3)
92
+
93
+
94
+ line_image_final = cv2.bitwise_and(final_image__, binary_image1)
95
+ line.append(line_image_final)
96
+ # cv2.namedWindow("Line image", cv2.WINDOW_NORMAL)
97
+ # cv2.imshow('Line image',line_image_final)
98
+ # cv2.waitKey(0)
99
+
100
+
101
+
102
+ def image_resize_and_errosion(image):
103
+
104
+ height, width = image.shape[:2]
105
+ height = height + 1 * height
106
+ height = int(height)
107
+
108
+ resized_image = cv2.resize(image, (width, height))
109
+
110
+ kernel = np.ones((13,1),np.uint8)
111
+
112
+ erosion = cv2.erode(resized_image,kernel,iterations = 1)
113
+
114
+ return erosion
115
+
116
+
117
+ x_scaling = 0
118
+ y_scaling = 0
119
+ binary_image1 = 0
120
+ line = 0
121
+ line_length = 0
122
+ count = 0
123
+
124
+ def convert_image(img):
125
+ folder_path = 'images'
126
+
127
+ for filename in os.listdir(folder_path):
128
+ file_path = os.path.join(folder_path, filename)
129
+ try:
130
+ if os.path.isfile(file_path):
131
+ os.remove(file_path)
132
+ except Exception as e:
133
+ print(f"Error deleting file {file_path}: {e}")
134
+
135
+
136
+
137
+ global x_scaling,y_scaling,binary_image1,line,line_lenght,count
138
+ # img = cv2.imread(image_file)
139
+ img_copy = np.copy(img)
140
+ line_lenght = 250
141
+ rect_image = img
142
+
143
+ # removing the shadow in the image
144
+ image1, image2_ = remove_shadow(rect_image)
145
+
146
+ # converting into grayscale
147
+ gray_ = cv2.cvtColor(image2_,cv2.COLOR_BGR2GRAY)
148
+
149
+ # cv2.namedWindow("grayscale image", cv2.WINDOW_NORMAL)
150
+ # cv2.imshow('grayscale image',gray_)
151
+ # cv2.waitKey(0)
152
+
153
+ # convrting into binaryimage
154
+ _, binary_image_ = cv2.threshold(gray_, 200, 255, cv2.THRESH_BINARY)
155
+ # cv2.namedWindow("binary image", cv2.WINDOW_NORMAL)
156
+ # cv2.imshow('binary image',binary_image_)
157
+ # cv2.waitKey(0)
158
+
159
+ inverted_binary_image_ = 255 - binary_image_
160
+
161
+ binary_image1 = np.copy(inverted_binary_image_)
162
+
163
+ y_height ,x_width= rect_image.shape[:2]
164
+
165
+ # print("image width, height =", x_width, y_height)
166
+
167
+ # resizing the image
168
+ new_width = 500*5
169
+ new_height = 705*5
170
+
171
+ x_scaling = x_width/new_width
172
+ y_scaling = y_height/new_height
173
+
174
+ # print("After resizing width, height", new_width , new_height)
175
+ rect_image = cv2.resize(rect_image, (new_width, new_height), interpolation=cv2.INTER_NEAREST)
176
+ # cv2.namedWindow("resized image", cv2.WINDOW_NORMAL)
177
+ # cv2.imshow('resized image',rect_image)
178
+ # cv2.waitKey(0)
179
+
180
+ # removing the shadow in the image
181
+ image1, image2 = remove_shadow(rect_image)
182
+
183
+ # converting into grayscale
184
+ gray = cv2.cvtColor(image2,cv2.COLOR_BGR2GRAY)
185
+ # cv2.namedWindow("grayscale image", cv2.WINDOW_NORMAL)
186
+ # cv2.imshow('grayscale image',gray)
187
+ # cv2.waitKey(0)
188
+
189
+ # convrting into binaryimage
190
+ _, binary_image = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
191
+ _, binary_image = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
192
+ # cv2.namedWindow("binary image", cv2.WINDOW_NORMAL)
193
+ # cv2.imshow('binary image',gray)
194
+ # cv2.waitKey(0)
195
+
196
+ # inverting the pixel
197
+ inverted_binary_image = 255 - binary_image
198
+
199
+ kernel = np.ones((2,2),np.uint8)
200
+
201
+
202
+ # performing erosion to remove noise
203
+ erosion = cv2.erode(inverted_binary_image,kernel,iterations = 1)
204
+ # cv2.namedWindow("erosion", cv2.WINDOW_NORMAL)
205
+ # cv2.imshow('erosion',erosion)
206
+ # cv2.waitKey(0)
207
+
208
+
209
+ # performing Dilution operatiom
210
+ dilation = cv2.dilate(erosion,kernel,iterations = 1)
211
+ # cv2.namedWindow("dilation", cv2.WINDOW_NORMAL)
212
+ # cv2.imshow('dilation',erosion)
213
+ # cv2.waitKey(0)
214
+
215
+
216
+ new_image = np.copy(dilation)
217
+ new_image = 255 - new_image
218
+
219
+
220
+ # defining kernal size
221
+ kernel = np.ones((1,250),np.uint8)
222
+
223
+
224
+ # performing Dilution operatiom
225
+ dilation_1 = cv2.dilate(dilation,kernel,iterations = 2)
226
+ # cv2.namedWindow("dilation_1", cv2.WINDOW_NORMAL)
227
+ # cv2.imshow('dilation_1',dilation_1)
228
+ # cv2.waitKey(0)
229
+
230
+ contours, _ = cv2.findContours(dilation_1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
231
+
232
+ line = []
233
+ # line saparation
234
+ for i in reversed(contours):
235
+ x, y, w, h = cv2.boundingRect(i)
236
+ if cv2.contourArea(i)<20:
237
+ continue
238
+ elif h < 10:
239
+ continue
240
+ else:
241
+ cv2.drawContours(new_image, [i],-1,(0),2)
242
+ final_image_ = np.zeros_like(binary_image)
243
+ cv2.drawContours(final_image_, [i], 0, (255), -1)
244
+
245
+ # cv2.namedWindow("final_image_", cv2.WINDOW_NORMAL)
246
+ # cv2.imshow('final_image_',final_image_)
247
+ # cv2.waitKey(0)
248
+
249
+
250
+ line_image = cv2.bitwise_and(final_image_, dilation)
251
+ # cv2.namedWindow("line_image", cv2.WINDOW_NORMAL)
252
+ # cv2.imshow('line_image',line_image)
253
+ # cv2.waitKey(0)
254
+
255
+
256
+ analise(line_image)
257
+
258
+
259
+ count = 0
260
+ kernel1 = np.ones((8,8),np.uint8)
261
+ word__image = [] # newly added
262
+ for line_image in line:
263
+
264
+ dilation_2 = cv2.dilate(line_image,kernel1,iterations = 2)
265
+
266
+ contours1, _ = cv2.findContours(dilation_2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
267
+
268
+ sorted_contours = sorted(contours1, key=lambda c: cv2.boundingRect(c)[0])
269
+
270
+ for j in sorted_contours:
271
+ x1,y1,w1,h1 = cv2.boundingRect(j)
272
+ final_image = line_image[y1:y1+h1,x1:x1+w1]
273
+ image_name ="images/"+str(count)+".png"
274
+ final_image = 255 - final_image
275
+ word__image.append(final_image)# newly added
276
+ # cv2.imwrite(image_name, final_image)
277
+ count=count+1
278
+
279
+ # cv2.waitKey(0)
280
+ # cv2.destroyAllWindows()
281
+ return word__image
282
+
283
+
284
+
285
+
286
+ # img = cv2.imread("ans_image/1.jpg")
287
+ # convert_image(img)
288
+
all_models.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
+
4
+ class ModelSingleton:
5
+ _instance = None
6
+ _initialized = False
7
+
8
+ def __new__(cls):
9
+ if cls._instance is None:
10
+ cls._instance = super().__new__(cls)
11
+ return cls._instance
12
+
13
+ def __init__(self):
14
+ if not self._initialized:
15
+ # Sentence transformer model
16
+ SENTENCE_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
17
+ self.similarity_tokenizer = AutoTokenizer.from_pretrained(SENTENCE_MODEL)
18
+ self.similarity_model = SentenceTransformer(SENTENCE_MODEL)
19
+
20
+ # Flan-T5-xl model only
21
+ FLAN_MODEL = "google/flan-t5-xl"
22
+ self.flan_tokenizer = AutoTokenizer.from_pretrained(FLAN_MODEL)
23
+ self.flan_model = AutoModelForSeq2SeqLM.from_pretrained(FLAN_MODEL)
24
+
25
+ self._initialized = True
26
+
27
+ # Create a global instance
28
+ models = ModelSingleton()
correct_answer_generation/answer_generation.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ import torch
4
+ sys.path.append(os.path.dirname(os.path.dirname(__file__)))
5
+ from all_models import models
6
+
7
+
8
+ def query_(query, doc):
9
+ input_text = f"""
10
+ You are an AI assistant designed to extract relevant information from a document and generate a clear, concise answer.
11
+
12
+ Question: {query}
13
+
14
+ Provide a *single-paragraph response of 250 words* that summarizes key details, explains the answer logically, and avoids repetition. Ignore irrelevant details like page numbers, author names, and metadata.
15
+
16
+ Context:
17
+ "{doc}"
18
+
19
+ Answer:
20
+ """
21
+
22
+ # Move inputs to the same device as the model
23
+ device = next(models.flan_model.parameters()).device
24
+ inputs = models.flan_tokenizer(input_text, return_tensors="pt").to(device)
25
+ input_length = inputs["input_ids"].shape[1]
26
+ max_tokens = input_length + 180
27
+
28
+ with torch.no_grad():
29
+ outputs = models.flan_model.generate(
30
+ **inputs,
31
+ do_sample=True,
32
+ max_length=max_tokens,
33
+ min_length=100,
34
+ early_stopping=True,
35
+ temperature=0.7,
36
+ top_k=50,
37
+ top_p=0.9,
38
+ repetition_penalty=1.2,
39
+ num_beams=3
40
+ )
41
+
42
+ answer = models.flan_tokenizer.decode(outputs[0], skip_special_tokens=True)
43
+ # print(answer)
44
+ # answer = extract_answer(answer)
45
+ return answer
46
+
correct_answer_generation/answer_generation_database_creation.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from correct_answer_generation.create_database import create_database_main
4
+ from correct_answer_generation.related_content_creation import doc_creation
5
+ from correct_answer_generation.answer_generation import query_
6
+
7
+ def database_creation(path):
8
+ create_database_main(path)
9
+
10
+ def answer_generation(path,query):
11
+ # collection_name = os.path.splitext(os.path.basename(path))[0]
12
+ path = path.replace("/", "_")
13
+ data = doc_creation(query,path)
14
+ correct_answers = query_(query,data)
15
+ return correct_answers
16
+
17
+
18
+ # ans = answer_generation("OperatingSystems","What is the process, and how does it differ from a program?")
19
+ # # data = doc_creation(q,"OperatingSystems")
20
+
21
+
22
+
correct_answer_generation/create_database.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fitz # PyMuPDF
2
+ import re
3
+ import chromadb
4
+ import sys
5
+ import os
6
+ import uuid
7
+ sys.path.append(os.path.dirname(os.path.dirname(__file__)))
8
+ from all_models import models
9
+
10
+ def clean_text(text):
11
+ # Keep only letters, numbers, punctuation, whitespace, and newlines
12
+ cleaned_text = re.sub(r"[^a-zA-Z0-9\s.,!?;:'\"()\-]", "", text)
13
+ return cleaned_text
14
+
15
+ def extract_text_from_pdf(pdf_path):
16
+ text = ""
17
+ with fitz.open(pdf_path) as doc:
18
+ for page in doc:
19
+ page_text = page.get_text()
20
+ cleaned_text = clean_text(page_text)
21
+ text += cleaned_text
22
+ return text
23
+
24
+ def clean_data(text):
25
+ cleaned_text = re.sub(r'\n{2,}', '. \n', text) # Replace multiple newlines with a single newline
26
+ cleaned_text = re.sub(r' {2,}', '. \n', cleaned_text) # Replace multiple spaces with a newline
27
+ return cleaned_text.strip() # Strip leading/trailing whitespace
28
+
29
+ def combine_list(strings):
30
+ combined_list = []
31
+ current_combined = ""
32
+ for string in strings:
33
+ word_count = len(string.split())
34
+
35
+ if len(current_combined.split()) < 20:
36
+ current_combined += " " + string.strip() # Adding space before new string
37
+
38
+ # If the combined string reaches at least 20 words, add it to the final list
39
+ if len(current_combined.split()) >= 20:
40
+ combined_list.append(current_combined) # Strip to remove leading/trailing whitespace
41
+ current_combined = "" # Reset for the next round
42
+ if current_combined:
43
+ combined_list.append(current_combined.strip())
44
+ return combined_list
45
+
46
+ def create_databse(data, name):
47
+ # Initialize the Persistent Client
48
+ client = chromadb.PersistentClient(path="correct_answer_generation/chroma_db")
49
+
50
+ collection_names = client.list_collections()
51
+ if name in collection_names:
52
+ client.delete_collection(name) # Delete the old collection
53
+
54
+ # Create a Collection
55
+ collection = client.create_collection(name)
56
+
57
+ # Generate embeddings using the singleton model
58
+ embeddings = models.similarity_model.encode(data, batch_size=32, convert_to_tensor=True)
59
+
60
+ # Create documents and add them to the collection
61
+ unique_id = [str(uuid.uuid4()) for _ in range(len(embeddings))]
62
+
63
+ collection.add(
64
+ documents=data,
65
+ ids=unique_id
66
+ )
67
+
68
+ def create_database_main(path):
69
+ pdf_path = path
70
+ pdf_text = extract_text_from_pdf(pdf_path)
71
+ data = clean_data(pdf_text)
72
+ data = data.split('. \n')
73
+ for i in range(len(data)):
74
+ data[i] = re.sub(r' \n', ' ', data[i])
75
+ data[i] = re.sub(r'\s+', ' ', data[i])
76
+ data = [text for text in data if len(text) >= 2]
77
+ data = combine_list(data)
78
+
79
+ path = path.replace("/", "_")
80
+ create_databse(data, path)
81
+
correct_answer_generation/related_content_creation.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import chromadb
2
+
3
+
4
+ def doc_creation(q,collection_name):
5
+
6
+ client_ = chromadb.PersistentClient(path="correct_answer_generation/chroma_db")
7
+
8
+ # collection = client_.get_collection(name='OperatingSystems')
9
+ collection = client_.get_collection(name=collection_name)
10
+
11
+
12
+ results = collection.query(
13
+ query_texts=[q],
14
+ n_results=7, # how many results to return
15
+ include = ['documents'] # newly added
16
+ )
17
+
18
+ data = ""
19
+ for i in results['documents'][0]:
20
+ data += " "+i
21
+ # print(i)
22
+ return data
23
+
24
+ # q = "What is the difference between a process and a program?"
25
+ # data = doc_creation(q,"OperatingSystems")
26
+
27
+
28
+
29
+
main.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, render_template
2
+ import os
3
+ import json
4
+ import torch
5
+ from werkzeug.utils import secure_filename
6
+
7
+ from HTR.app import extract_text_from_image
8
+
9
+ from correct_answer_generation.answer_generation_database_creation import database_creation, answer_generation
10
+
11
+
12
+ from similarity_check.tf_idf.tf_idf_score import create_tfidf_values, tfidf_answer_score
13
+ from similarity_check.semantic_meaning_check.semantic import similarity_model_score, fasttext_similarity,question_vector_sentence,question_vector_word
14
+ from similarity_check.llm_based_scoring.llm import llm_score
15
+
16
+ app = Flask(__name__)
17
+
18
+ UPLOAD_FOLDER = 'uploads'
19
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
20
+
21
+ @app.route('/')
22
+ def index():
23
+ return render_template('index.html')
24
+
25
+
26
+
27
+ def new_value(value, old_min, old_max, new_min, new_max):
28
+ new_value = new_min + ((value - old_min) * (new_max - new_min)) / (old_max - old_min)
29
+ return new_value
30
+
31
+ @app.route('/compute_answers', methods=['POST'])
32
+ def compute_answers():
33
+ query_file = request.files.get('query_file')
34
+ if not query_file:
35
+ return jsonify({"error": "Missing query file"}), 400
36
+
37
+ queries = query_file.read().decode('utf-8').splitlines()
38
+ # print(queries)
39
+ file_type = request.form.get('file_type')
40
+ ans_csv_file = request.files.get('ans_csv_file')
41
+
42
+ if file_type == "csv":
43
+ ans_csv_file = ans_csv_file.read().decode('utf-8').splitlines()
44
+ c_answers = []
45
+ # print(ans_csv_file)
46
+ for i in ans_csv_file:
47
+ c_answers.append(i.split('\\n'))
48
+ # print(c_answers)
49
+ return jsonify({"answers": c_answers}), 200
50
+
51
+
52
+ try:
53
+ c_answers = []
54
+
55
+ if file_type == "csv":
56
+ # Process answer CSV file
57
+ answers = ans_csv_file.read().decode('utf-8').splitlines()
58
+ # print(answers)
59
+ # Implement CSV processing logic
60
+ c_answers = [f"Processed query: {query}" for query in queries]
61
+
62
+ elif file_type == 'pdf':
63
+
64
+ for query in queries:
65
+ folder_path = 'Knowledge_Retriever_pdf'
66
+
67
+ pdf_files = [f"{folder_path}/{file}" for file in os.listdir(folder_path) if file.endswith('.pdf')]
68
+
69
+ for i in pdf_files:
70
+ database_creation(i)
71
+
72
+ for i in queries:
73
+ ans = []
74
+ for j in pdf_files:
75
+ ans.append(answer_generation(j,i))
76
+ c_answers.append(ans)
77
+
78
+ else:
79
+ return jsonify({"error": "Unsupported file type"}), 400
80
+ # print(c_answers)
81
+ return jsonify({"answers": c_answers}), 200
82
+
83
+ except Exception as e:
84
+ return jsonify({"error": str(e)}), 500
85
+
86
+
87
+ @app.route('/compute_marks', methods=['POST'])
88
+ def compute_marks():
89
+ try:
90
+ a = request.form.get('answers')
91
+ a = json.loads(a)
92
+ answers = []
93
+ for i in a:
94
+ ans = i.split('\n\n')
95
+ answers.append(ans)
96
+
97
+ # Create temporary directory for uploaded files
98
+ temp_folder = os.path.join('uploads', 'temp_answers')
99
+ os.makedirs(temp_folder, exist_ok=True)
100
+
101
+ # Process uploaded files
102
+ files = request.files.getlist('files[]')
103
+ data = {}
104
+
105
+ for file in files:
106
+ if file.filename.endswith(('.jpg', '.jpeg', '.png')):
107
+ # Get the relative path from the uploaded folder structure
108
+ relative_path = file.filename
109
+
110
+ # Extract student folder name (first directory in path)
111
+ path_parts = relative_path.split('/')
112
+ if len(path_parts) >= 2:
113
+ student_folder = path_parts[0]
114
+
115
+ # Create student directory if it doesn't exist
116
+ student_path = os.path.join(temp_folder, student_folder)
117
+ os.makedirs(student_path, exist_ok=True)
118
+
119
+ # Save the file
120
+ save_path = os.path.join(temp_folder, relative_path)
121
+ os.makedirs(os.path.dirname(save_path), exist_ok=True)
122
+ file.save(save_path)
123
+
124
+ # Store file path in data dictionary
125
+ if student_folder in data:
126
+ data[student_folder].append(save_path)
127
+ else:
128
+ data[student_folder] = [save_path]
129
+
130
+ # Sort files for each student
131
+ for student in data:
132
+ data[student].sort() # This will sort the file paths alphabetically
133
+
134
+ # Rest of your existing marking logic
135
+ s_marks = {}
136
+ sen_vec_answers = []
137
+ word_vec_answers = []
138
+
139
+ for i in answers:
140
+ temp_v = []
141
+ temp_w = []
142
+ for j in i:
143
+ temp_v.append(question_vector_sentence(j))
144
+ temp_w.append(question_vector_word(j))
145
+ sen_vec_answers.append(temp_v)
146
+ word_vec_answers.append(temp_w)
147
+
148
+ for i in data:
149
+ s_marks[i] = []
150
+ count = 0
151
+ for j in data[i]:
152
+ image_path = j
153
+ s_answer = extract_text_from_image(image_path)
154
+ tf_idf_word_values, max_tfidf = create_tfidf_values(answers[count])
155
+ m = marks(s_answer, sen_vec_answers[count], word_vec_answers[count],
156
+ tf_idf_word_values, max_tfidf, answers[count])
157
+ if isinstance(m, torch.Tensor):
158
+ m = m.item()
159
+ s_marks[i].append(m)
160
+ count += 1
161
+
162
+ # Cleanup temporary files
163
+ import shutil
164
+ shutil.rmtree(temp_folder)
165
+
166
+ return jsonify({"message": s_marks}), 200
167
+
168
+ except Exception as e:
169
+ return jsonify({"error": str(e)}), 500
170
+
171
+
172
+
173
+ def marks(answer,sen_vec_answers,word_vec_answers,tf_idf_word_values, max_tfidf,correct_answers):
174
+ marks = 0
175
+ marks1 = tfidf_answer_score(answer,tf_idf_word_values,max_tfidf,marks =10)
176
+
177
+ if marks1>3:
178
+ marks += new_value(marks1, old_min = 3, old_max=10, new_min=0, new_max=5)
179
+ # print("TFIDF Score",float(marks))
180
+
181
+ if marks1>2:
182
+ marks2 = similarity_model_score(sen_vec_answers,answer)
183
+ a = 0
184
+ if marks2>0.95:
185
+ marks += 3
186
+ a = a+3
187
+ elif marks2>0.5:
188
+ marks += new_value(marks2, old_min = 0.5, old_max=0.95, new_min=0, new_max=3)
189
+ a = a+new_value(marks2, old_min = 0.5, old_max=0.95, new_min=0, new_max=3)
190
+ # print("sentence-transformers/all-MiniLM-L6-v2 with Cosine Similarity",a)
191
+
192
+ marks3 = fasttext_similarity(word_vec_answers,answer)
193
+ b = 0
194
+ if marks2>0.9:
195
+ marks += 2
196
+ b= b+2
197
+ elif marks3>0.4:
198
+ marks += new_value(marks3, old_min = 0.4, old_max=0.9, new_min=0, new_max=2)
199
+ b=b+new_value(marks3, old_min = 0.4, old_max=0.9, new_min=0, new_max=2)
200
+ # print("fasttext-wiki-news-subwords-300 with Soft Cosine Similarity",b)
201
+
202
+ marks4 = llm_score(correct_answers,answer)
203
+ for i in range(len(marks4)):
204
+ marks4[i] = float(marks4[i])
205
+
206
+ m = max(marks4)
207
+ # print("llm score",m/2)
208
+ marks = marks/2 + m/2
209
+
210
+ return marks
211
+
212
+
213
+
214
+
215
+
216
+ if __name__ == '__main__':
217
+ app.run(host='0.0.0.0', port=7860)
models/vit-base-beans/all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.2535211267605635,
3
+ "eval_loss": 0.011280356906354427,
4
+ "eval_precision": 1.0,
5
+ "eval_runtime": 120.2175,
6
+ "eval_samples_per_second": 2.096,
7
+ "eval_steps_per_second": 0.133,
8
+ "total_flos": 1.9806952545489715e+17,
9
+ "train_loss": 0.1075204591266811,
10
+ "train_runtime": 5558.7629,
11
+ "train_samples_per_second": 4.08,
12
+ "train_steps_per_second": 0.255
13
+ }
models/vit-base-beans/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "vitpre",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "image_size": 224,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "model_type": "vit",
16
+ "num_attention_heads": 12,
17
+ "num_channels": 3,
18
+ "num_hidden_layers": 12,
19
+ "patch_size": 16,
20
+ "problem_type": "single_label_classification",
21
+ "qkv_bias": true,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.41.2"
24
+ }
models/vit-base-beans/eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.2535211267605635,
3
+ "eval_loss": 0.011280356906354427,
4
+ "eval_precision": 1.0,
5
+ "eval_runtime": 120.2175,
6
+ "eval_samples_per_second": 2.096,
7
+ "eval_steps_per_second": 0.133
8
+ }
models/vit-base-beans/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b75581f71443ac2edbb7a6b087a3bb52dd5da0df124ba42b2da04a4c257466f
3
+ size 343223968
models/vit-base-beans/runs/Jun06_19-03-39_DELL/events.out.tfevents.1717715031.DELL.3528.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3365ccc0e41eb48d34859835f9df2f0ffe01b2e26801fc353474dd62e9b396b
3
+ size 13489
models/vit-base-beans/runs/Jun06_19-03-39_DELL/events.out.tfevents.1717720711.DELL.3528.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:677f7fa63a4d50f27d37f5ab5aaf576d62ce8317a5e21a2889dc291b79b3e1c4
3
+ size 412
models/vit-base-beans/train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.2535211267605635,
3
+ "total_flos": 1.9806952545489715e+17,
4
+ "train_loss": 0.1075204591266811,
5
+ "train_runtime": 5558.7629,
6
+ "train_samples_per_second": 4.08,
7
+ "train_steps_per_second": 0.255
8
+ }
models/vit-base-beans/trainer_state.json ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.011280356906354427,
3
+ "best_model_checkpoint": "./vit-base-beans\\checkpoint-130",
4
+ "epoch": 2.2535211267605635,
5
+ "eval_steps": 10,
6
+ "global_step": 160,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.14084507042253522,
13
+ "grad_norm": 3.220585584640503,
14
+ "learning_rate": 0.00019859154929577466,
15
+ "loss": 0.4675,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.14084507042253522,
20
+ "eval_loss": 0.14601898193359375,
21
+ "eval_precision": 0.9767441860465116,
22
+ "eval_runtime": 115.8763,
23
+ "eval_samples_per_second": 2.175,
24
+ "eval_steps_per_second": 0.138,
25
+ "step": 10
26
+ },
27
+ {
28
+ "epoch": 0.28169014084507044,
29
+ "grad_norm": 0.3653267025947571,
30
+ "learning_rate": 0.0001971830985915493,
31
+ "loss": 0.2185,
32
+ "step": 20
33
+ },
34
+ {
35
+ "epoch": 0.28169014084507044,
36
+ "eval_loss": 0.18865476548671722,
37
+ "eval_precision": 0.9264705882352942,
38
+ "eval_runtime": 119.7761,
39
+ "eval_samples_per_second": 2.104,
40
+ "eval_steps_per_second": 0.134,
41
+ "step": 20
42
+ },
43
+ {
44
+ "epoch": 0.4225352112676056,
45
+ "grad_norm": 0.9396668672561646,
46
+ "learning_rate": 0.00019577464788732396,
47
+ "loss": 0.1316,
48
+ "step": 30
49
+ },
50
+ {
51
+ "epoch": 0.4225352112676056,
52
+ "eval_loss": 0.061195846647024155,
53
+ "eval_precision": 1.0,
54
+ "eval_runtime": 119.7047,
55
+ "eval_samples_per_second": 2.105,
56
+ "eval_steps_per_second": 0.134,
57
+ "step": 30
58
+ },
59
+ {
60
+ "epoch": 0.5633802816901409,
61
+ "grad_norm": 1.195994257926941,
62
+ "learning_rate": 0.00019436619718309861,
63
+ "loss": 0.081,
64
+ "step": 40
65
+ },
66
+ {
67
+ "epoch": 0.5633802816901409,
68
+ "eval_loss": 0.26516667008399963,
69
+ "eval_precision": 1.0,
70
+ "eval_runtime": 119.2044,
71
+ "eval_samples_per_second": 2.114,
72
+ "eval_steps_per_second": 0.134,
73
+ "step": 40
74
+ },
75
+ {
76
+ "epoch": 0.704225352112676,
77
+ "grad_norm": 13.78415584564209,
78
+ "learning_rate": 0.00019295774647887326,
79
+ "loss": 0.0995,
80
+ "step": 50
81
+ },
82
+ {
83
+ "epoch": 0.704225352112676,
84
+ "eval_loss": 0.14177252352237701,
85
+ "eval_precision": 0.968,
86
+ "eval_runtime": 120.7206,
87
+ "eval_samples_per_second": 2.087,
88
+ "eval_steps_per_second": 0.133,
89
+ "step": 50
90
+ },
91
+ {
92
+ "epoch": 0.8450704225352113,
93
+ "grad_norm": 5.696216106414795,
94
+ "learning_rate": 0.0001915492957746479,
95
+ "loss": 0.2069,
96
+ "step": 60
97
+ },
98
+ {
99
+ "epoch": 0.8450704225352113,
100
+ "eval_loss": 0.03753811493515968,
101
+ "eval_precision": 0.984375,
102
+ "eval_runtime": 122.9296,
103
+ "eval_samples_per_second": 2.05,
104
+ "eval_steps_per_second": 0.13,
105
+ "step": 60
106
+ },
107
+ {
108
+ "epoch": 0.9859154929577465,
109
+ "grad_norm": 3.0282273292541504,
110
+ "learning_rate": 0.00019014084507042254,
111
+ "loss": 0.0379,
112
+ "step": 70
113
+ },
114
+ {
115
+ "epoch": 0.9859154929577465,
116
+ "eval_loss": 0.07843092083930969,
117
+ "eval_precision": 0.9541984732824428,
118
+ "eval_runtime": 124.2593,
119
+ "eval_samples_per_second": 2.028,
120
+ "eval_steps_per_second": 0.129,
121
+ "step": 70
122
+ },
123
+ {
124
+ "epoch": 1.1267605633802817,
125
+ "grad_norm": 5.544398784637451,
126
+ "learning_rate": 0.0001887323943661972,
127
+ "loss": 0.1098,
128
+ "step": 80
129
+ },
130
+ {
131
+ "epoch": 1.1267605633802817,
132
+ "eval_loss": 0.11345015466213226,
133
+ "eval_precision": 0.9264705882352942,
134
+ "eval_runtime": 122.7101,
135
+ "eval_samples_per_second": 2.054,
136
+ "eval_steps_per_second": 0.13,
137
+ "step": 80
138
+ },
139
+ {
140
+ "epoch": 1.267605633802817,
141
+ "grad_norm": 2.5479578971862793,
142
+ "learning_rate": 0.00018732394366197184,
143
+ "loss": 0.0958,
144
+ "step": 90
145
+ },
146
+ {
147
+ "epoch": 1.267605633802817,
148
+ "eval_loss": 0.12344790995121002,
149
+ "eval_precision": 0.9130434782608695,
150
+ "eval_runtime": 177.1319,
151
+ "eval_samples_per_second": 1.423,
152
+ "eval_steps_per_second": 0.09,
153
+ "step": 90
154
+ },
155
+ {
156
+ "epoch": 1.408450704225352,
157
+ "grad_norm": 0.5559585690498352,
158
+ "learning_rate": 0.0001859154929577465,
159
+ "loss": 0.0762,
160
+ "step": 100
161
+ },
162
+ {
163
+ "epoch": 1.408450704225352,
164
+ "eval_loss": 0.07210251688957214,
165
+ "eval_precision": 1.0,
166
+ "eval_runtime": 124.2559,
167
+ "eval_samples_per_second": 2.028,
168
+ "eval_steps_per_second": 0.129,
169
+ "step": 100
170
+ },
171
+ {
172
+ "epoch": 1.5492957746478875,
173
+ "grad_norm": 0.07416976243257523,
174
+ "learning_rate": 0.00018450704225352114,
175
+ "loss": 0.0088,
176
+ "step": 110
177
+ },
178
+ {
179
+ "epoch": 1.5492957746478875,
180
+ "eval_loss": 0.01765807531774044,
181
+ "eval_precision": 1.0,
182
+ "eval_runtime": 214.9118,
183
+ "eval_samples_per_second": 1.173,
184
+ "eval_steps_per_second": 0.074,
185
+ "step": 110
186
+ },
187
+ {
188
+ "epoch": 1.6901408450704225,
189
+ "grad_norm": 0.0715404525399208,
190
+ "learning_rate": 0.0001830985915492958,
191
+ "loss": 0.0085,
192
+ "step": 120
193
+ },
194
+ {
195
+ "epoch": 1.6901408450704225,
196
+ "eval_loss": 0.14363093674182892,
197
+ "eval_precision": 0.9333333333333333,
198
+ "eval_runtime": 121.1058,
199
+ "eval_samples_per_second": 2.081,
200
+ "eval_steps_per_second": 0.132,
201
+ "step": 120
202
+ },
203
+ {
204
+ "epoch": 1.8309859154929577,
205
+ "grad_norm": 0.03990806266665459,
206
+ "learning_rate": 0.00018169014084507045,
207
+ "loss": 0.0071,
208
+ "step": 130
209
+ },
210
+ {
211
+ "epoch": 1.8309859154929577,
212
+ "eval_loss": 0.011280356906354427,
213
+ "eval_precision": 1.0,
214
+ "eval_runtime": 120.5316,
215
+ "eval_samples_per_second": 2.091,
216
+ "eval_steps_per_second": 0.133,
217
+ "step": 130
218
+ },
219
+ {
220
+ "epoch": 1.971830985915493,
221
+ "grad_norm": 0.03454509377479553,
222
+ "learning_rate": 0.00018028169014084507,
223
+ "loss": 0.0155,
224
+ "step": 140
225
+ },
226
+ {
227
+ "epoch": 1.971830985915493,
228
+ "eval_loss": 0.10382802784442902,
229
+ "eval_precision": 0.9545454545454546,
230
+ "eval_runtime": 121.0415,
231
+ "eval_samples_per_second": 2.082,
232
+ "eval_steps_per_second": 0.132,
233
+ "step": 140
234
+ },
235
+ {
236
+ "epoch": 2.112676056338028,
237
+ "grad_norm": 0.03133228421211243,
238
+ "learning_rate": 0.00017887323943661972,
239
+ "loss": 0.069,
240
+ "step": 150
241
+ },
242
+ {
243
+ "epoch": 2.112676056338028,
244
+ "eval_loss": 0.05120203271508217,
245
+ "eval_precision": 1.0,
246
+ "eval_runtime": 121.432,
247
+ "eval_samples_per_second": 2.075,
248
+ "eval_steps_per_second": 0.132,
249
+ "step": 150
250
+ },
251
+ {
252
+ "epoch": 2.2535211267605635,
253
+ "grad_norm": 0.033982835710048676,
254
+ "learning_rate": 0.00017746478873239437,
255
+ "loss": 0.0866,
256
+ "step": 160
257
+ },
258
+ {
259
+ "epoch": 2.2535211267605635,
260
+ "eval_loss": 0.0187800545245409,
261
+ "eval_precision": 0.9921259842519685,
262
+ "eval_runtime": 123.5401,
263
+ "eval_samples_per_second": 2.04,
264
+ "eval_steps_per_second": 0.13,
265
+ "step": 160
266
+ },
267
+ {
268
+ "epoch": 2.2535211267605635,
269
+ "step": 160,
270
+ "total_flos": 1.9806952545489715e+17,
271
+ "train_loss": 0.1075204591266811,
272
+ "train_runtime": 5558.7629,
273
+ "train_samples_per_second": 4.08,
274
+ "train_steps_per_second": 0.255
275
+ }
276
+ ],
277
+ "logging_steps": 10,
278
+ "max_steps": 1420,
279
+ "num_input_tokens_seen": 0,
280
+ "num_train_epochs": 20,
281
+ "save_steps": 10,
282
+ "stateful_callbacks": {
283
+ "EarlyStoppingCallback": {
284
+ "args": {
285
+ "early_stopping_patience": 5,
286
+ "early_stopping_threshold": 0.01
287
+ },
288
+ "attributes": {
289
+ "early_stopping_patience_counter": 0
290
+ }
291
+ },
292
+ "TrainerControl": {
293
+ "args": {
294
+ "should_epoch_stop": false,
295
+ "should_evaluate": false,
296
+ "should_log": false,
297
+ "should_save": true,
298
+ "should_training_stop": true
299
+ },
300
+ "attributes": {}
301
+ }
302
+ },
303
+ "total_flos": 1.9806952545489715e+17,
304
+ "train_batch_size": 16,
305
+ "trial_name": null,
306
+ "trial_params": null
307
+ }
models/vit-base-beans/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e900ee99fbfc723afe73ab030c236d4f1d59bb636d495f08883046cfa74531c4
3
+ size 5048
similarity_check/llm_based_scoring/llm.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import sys
3
+ import os
4
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
5
+ from all_models import models
6
+
7
+ # Remove these lines since we're using the singleton
8
+ # MODEL_NAME = "google/flan-t5-xl"
9
+ # model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
10
+ # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
11
+
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
13
+ models.flan_model.to(device)
14
+
15
+ def llm_score(correct_answers, answer):
16
+ score = []
17
+
18
+ for correct_answer in correct_answers:
19
+ print(correct_answer)
20
+ print(answer)
21
+ print()
22
+ print()
23
+ prompt = (
24
+ "You are an expert evaluator of answers. Your response must be a *single numeric score (0-10), not a range.*\n\n"
25
+
26
+ "The user's answer has been converted from handwriting using OCR, so minor spelling, punctuation, or small word variations may exist. "
27
+ "Focus on meaning rather than transcription errors.\n\n"
28
+
29
+ "### Evaluation Criteria:\n"
30
+ "- *Correctness (90% weight):* Does the answer accurately convey the meaning of the correct answer?\n"
31
+ "- *Completeness (10% weight):* Does it cover all key points?\n\n"
32
+
33
+ "### Handling OCR Errors:\n"
34
+ "- Ignore minor spelling/punctuation mistakes that don't affect meaning.\n"
35
+ "- Penalize only if word substitutions change the meaning.\n\n"
36
+
37
+ "### Scoring Guidelines:\n"
38
+ "- *10:* Fully correct and complete (90-100% accurate).\n"
39
+ "- *From 9 to 8:* Mostly correct, minor missing details (80-90% accurate).\n"
40
+ "- *From 7 to 6:* Good but missing some key points (60-80% accurate).\n"
41
+ "- *From 5 to 4:* Average, with several omissions/errors (40-60% accurate).\n"
42
+ "- *From 3 to 2:* Poor, major meaning errors (20-40% accurate).\n"
43
+ "- *From 1 to 0:* Incorrect or irrelevant (less than 20% accurate).\n\n"
44
+
45
+ "Compare the answers and assign a *single numeric score (0-10)* based on correctness and completeness.\n\n"
46
+
47
+ "Correct answer:\n"
48
+ f"{correct_answer}\n\n"
49
+ "User's answer:\n"
50
+ f"{answer}\n\n"
51
+ "Final Score (numeric only, strictly between 0 and 10):")
52
+
53
+ # Tokenize input prompt
54
+ inputs = models.flan_tokenizer(prompt, return_tensors="pt").to(device)
55
+
56
+ # Generate response
57
+ with torch.no_grad():
58
+ outputs = models.flan_model.generate(
59
+ **inputs,
60
+ max_length=2048,
61
+ do_sample=True,
62
+ num_return_sequences=1,
63
+ num_beams=5,
64
+ temperature=0.6,
65
+ top_p=0.9,
66
+ early_stopping=True,
67
+ pad_token_id=models.flan_tokenizer.pad_token_id,
68
+ eos_token_id=models.flan_tokenizer.eos_token_id,
69
+ bos_token_id=models.flan_tokenizer.bos_token_id,
70
+ )
71
+
72
+ # Decode and print response
73
+ print(models.flan_tokenizer.decode(outputs[0], skip_special_tokens=True))
74
+ score.append(models.flan_tokenizer.decode(outputs[0], skip_special_tokens=True))
75
+
76
+ return score
similarity_check/semantic_meaning_check/semantic.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import util
2
+ from nltk.corpus import stopwords
3
+ from nltk.tokenize import word_tokenize
4
+ from gensim.models import KeyedVectors
5
+ import numpy as np
6
+ import nltk
7
+ from gensim import corpora
8
+ from gensim.models import FastText
9
+ from gensim.similarities import SparseTermSimilarityMatrix, WordEmbeddingSimilarityIndex
10
+ from gensim.downloader import load
11
+ import sys
12
+ import os
13
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
14
+ from all_models import models
15
+
16
+ # Keep fasttext as is
17
+ fasttext = load('fasttext-wiki-news-subwords-300')
18
+
19
+ # nltk.download('punkt')
20
+ # nltk.download('stopwords')
21
+
22
+ def question_vector_sentence(correct_answer):
23
+ return models.similarity_model.encode(correct_answer, convert_to_tensor=True)
24
+
25
+ def similarity_model_score(correct_answer_vector, answer):
26
+ answer_embedding = models.similarity_model.encode(answer, convert_to_tensor=True)
27
+ cosine_score = float('-inf')
28
+ for i in correct_answer_vector:
29
+ cosine_score = max(cosine_score, util.pytorch_cos_sim(i, answer_embedding))
30
+ return cosine_score
31
+
32
+ def preprocess(sentence):
33
+ # Lowercase and remove punctuation
34
+ sentence = sentence.lower()
35
+ # Tokenize
36
+ words = word_tokenize(sentence)
37
+ # Remove stop words
38
+ words = [word for word in words if word not in stopwords.words('english')]
39
+ return words
40
+
41
+ def sentence_to_vec(tokens, model):
42
+ # Filter words that are in the Word2Vec vocabulary
43
+ valid_words = [word for word in tokens if word in model]
44
+
45
+ # If there are no valid words, return a zero vector
46
+ if not valid_words:
47
+ return np.zeros(model.vector_size)
48
+
49
+ # Compute the average vector
50
+ word_vectors = [model[word] for word in valid_words]
51
+ sentence_vector = np.mean(word_vectors, axis=0)
52
+
53
+ return sentence_vector
54
+
55
+ def compute_scm(tokens1, tokens2, model):
56
+ dictionary = corpora.Dictionary([tokens1, tokens2])
57
+ tokens1 = dictionary.doc2bow(tokens1)
58
+ tokens2 = dictionary.doc2bow(tokens2)
59
+ termsim_index = WordEmbeddingSimilarityIndex(model)
60
+ termsim_matrix = SparseTermSimilarityMatrix(termsim_index, dictionary)
61
+ similarity = termsim_matrix.inner_product(tokens1, tokens2, normalized=(True, True))
62
+ return similarity
63
+
64
+ def question_vector_word(correct_answer):
65
+ return preprocess(correct_answer)
66
+
67
+ def fasttext_similarity(correct_answer_vector, answer):
68
+ preprocess_answer = preprocess(answer)
69
+ soft_cosine = float('-inf')
70
+
71
+ for i in correct_answer_vector:
72
+ soft_cosine = max(compute_scm(i, preprocess_answer, fasttext), soft_cosine)
73
+
74
+ return soft_cosine
75
+
76
+
77
+
78
+
79
+
80
+
81
+
82
+
similarity_check/tf_idf/tf_idf_score.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ from nltk.corpus import stopwords
3
+ from nltk.tokenize import word_tokenize
4
+ from nltk.corpus import wordnet
5
+ from collections import Counter
6
+ import string
7
+
8
+ # Check and download required NLTK packages
9
+ try:
10
+ stopwords.words('english')
11
+ except LookupError:
12
+ print("Downloading required NLTK data...")
13
+ nltk.download('stopwords')
14
+ nltk.download('punkt')
15
+ nltk.download('wordnet')
16
+
17
+ def remove_stopwords(sentence):
18
+
19
+ # converting into words
20
+ words = word_tokenize(sentence)
21
+
22
+ # Get the set of English stop words
23
+ stop_words = set(stopwords.words('english'))
24
+
25
+ # Remove stop words from the list of words
26
+ filtered_words = [word for word in words if word.lower() not in stop_words]
27
+
28
+ words = [word.lower() for word in words if word.isalpha() and len(word)>1]
29
+
30
+ return words
31
+
32
+ def get_synonyms(word):
33
+ synonyms = set()
34
+ for syn in wordnet.synsets(word):
35
+ for lemma in syn.lemmas():
36
+ synonyms.add(lemma.name().lower())
37
+ return synonyms
38
+
39
+
40
+ def process_sentence(words):
41
+
42
+ # Find synonyms for each word
43
+ synonym_map = {}
44
+ for word in words:
45
+ synonyms = get_synonyms(word)
46
+ synonyms.add(word) # Ensure the word itself is included if no synonyms are found
47
+ synonym_map[word] = list(synonyms)
48
+
49
+ return synonym_map
50
+
51
+ def tf(dict1):
52
+ # print(dict1)
53
+ no_of_terms_in_document = len(dict1)
54
+ word_frequency = {}
55
+ for i in dict1:
56
+ count = 0
57
+ for j in dict1:
58
+ if i in dict1[j]:
59
+ count+=1
60
+ word_frequency[i] = count
61
+ # print(word_frequency)
62
+
63
+ for i in word_frequency:
64
+ word_frequency[i] = word_frequency[i]/no_of_terms_in_document
65
+
66
+ return word_frequency
67
+
68
+ def idf(di):
69
+ no_of_documents = len(di)
70
+ new_dict = {}
71
+ for d in range(len(di)):
72
+ for i in di[d]:
73
+ if i not in new_dict:
74
+ new_dict[i]=set()
75
+ new_dict[i].add(d)
76
+ else:
77
+ new_dict[i].add(d)
78
+
79
+ r = {}
80
+ for i in new_dict:
81
+ r[i]=len(new_dict[i])/no_of_documents
82
+ return r
83
+
84
+ def total_tf_idf_value(tf_idf_word_values,synonyms_words):
85
+ value = 0
86
+ for i in synonyms_words:
87
+ for j in synonyms_words[i]:
88
+ if j in tf_idf_word_values:
89
+ value += tf_idf_word_values[j]
90
+ break
91
+ return value
92
+
93
+
94
+ def create_tfidf_values(correct_answer):
95
+ correct_answer_words = []
96
+ for i in correct_answer:
97
+ correct_answer_words.append(remove_stopwords(i))
98
+
99
+ correct_synonyms_words = []
100
+
101
+ for i in correct_answer_words:
102
+ correct_synonyms_words.append(process_sentence(i))
103
+
104
+ tf_ = []
105
+ for i in correct_synonyms_words:
106
+ tf_.append(tf(i))
107
+
108
+
109
+ idf_values = idf(correct_synonyms_words)
110
+
111
+ tf_idf_word_values = {}
112
+ count = 0
113
+ for correct_synonyms_word in correct_synonyms_words:
114
+ for i in correct_synonyms_word:
115
+ value = tf_[count][i]*idf_values[i]
116
+ if i in tf_idf_word_values:
117
+ tf_idf_word_values[i] = max(tf_idf_word_values[i],value)
118
+ else:
119
+ tf_idf_word_values[i] = value
120
+ count+=1
121
+ for i in tf_idf_word_values:
122
+ tf_idf_word_values[i] = round(tf_idf_word_values[i], 4)
123
+
124
+ tfidf_correct_ans = float('inf')
125
+ for i in correct_synonyms_words:
126
+ tfidf_correct_ans = min(total_tf_idf_value(tf_idf_word_values,i),tfidf_correct_ans)
127
+
128
+ return tf_idf_word_values,tfidf_correct_ans
129
+
130
+
131
+ def tfidf_answer_score(answer,tf_idf_word_values,max_tfidf,marks=10):
132
+ answer = remove_stopwords(answer)
133
+ answer_synonyms_words = process_sentence(answer)
134
+ value = total_tf_idf_value(tf_idf_word_values,answer_synonyms_words)
135
+ # print("tfidf value of answer: ",value, " , " "minimum tfidf value of correct answer answer: " ,max_tfidf)
136
+ score = (value/max_tfidf)*marks
137
+ # print(score)
138
+ if score>10:
139
+ return 10
140
+ else:
141
+ return score
142
+
templates/index.html ADDED
@@ -0,0 +1,367 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Answer Generation</title>
7
+ <!-- Add Google Fonts -->
8
+ <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;500;600&display=swap" rel="stylesheet">
9
+ <style>
10
+ :root {
11
+ --primary-color: #4361ee;
12
+ --secondary-color: #3f37c9;
13
+ --accent-color: #4895ef;
14
+ --background-color: #f8f9fa;
15
+ --text-color: #2b2d42;
16
+ --border-radius: 8px;
17
+ --box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
18
+ }
19
+
20
+ body {
21
+ font-family: 'Poppins', sans-serif;
22
+ margin: 0;
23
+ padding: 2rem;
24
+ background-color: var(--background-color);
25
+ color: var(--text-color);
26
+ line-height: 1.6;
27
+ }
28
+
29
+ .container {
30
+ max-width: 1200px;
31
+ margin: 0 auto;
32
+ padding: 2rem;
33
+ background: white;
34
+ border-radius: var(--border-radius);
35
+ box-shadow: var(--box-shadow);
36
+ }
37
+
38
+ h2 {
39
+ color: var(--primary-color);
40
+ margin-bottom: 1.5rem;
41
+ font-weight: 600;
42
+ position: relative;
43
+ padding-bottom: 0.5rem;
44
+ }
45
+
46
+ h2::after {
47
+ content: '';
48
+ position: absolute;
49
+ bottom: 0;
50
+ left: 0;
51
+ width: 50px;
52
+ height: 3px;
53
+ background-color: var(--accent-color);
54
+ border-radius: 2px;
55
+ }
56
+
57
+ .section {
58
+ background: white;
59
+ padding: 1.5rem;
60
+ border-radius: var(--border-radius);
61
+ margin-bottom: 2rem;
62
+ box-shadow: var(--box-shadow);
63
+ }
64
+
65
+ .upload-container {
66
+ margin-bottom: 1.5rem;
67
+ }
68
+
69
+ label {
70
+ display: block;
71
+ margin-bottom: 0.5rem;
72
+ font-weight: 500;
73
+ color: var(--text-color);
74
+ }
75
+
76
+ input[type="file"] {
77
+ width: 100%;
78
+ padding: 0.5rem;
79
+ margin-bottom: 1rem;
80
+ border: 2px dashed var(--accent-color);
81
+ border-radius: var(--border-radius);
82
+ background: #f8f9fa;
83
+ cursor: pointer;
84
+ }
85
+
86
+ input[type="file"]:hover {
87
+ border-color: var(--primary-color);
88
+ }
89
+
90
+ select {
91
+ width: 100%;
92
+ padding: 0.8rem;
93
+ border: 1px solid #ddd;
94
+ border-radius: var(--border-radius);
95
+ margin-bottom: 1rem;
96
+ font-family: 'Poppins', sans-serif;
97
+ appearance: none;
98
+ background: white url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='16' height='16' fill='%23444' viewBox='0 0 16 16'%3E%3Cpath d='M8 12L2 6h12z'/%3E%3C/svg%3E") no-repeat right 0.8rem center;
99
+ }
100
+
101
+ button {
102
+ background-color: var(--primary-color);
103
+ color: white;
104
+ border: none;
105
+ padding: 0.8rem 1.5rem;
106
+ border-radius: var(--border-radius);
107
+ cursor: pointer;
108
+ font-weight: 500;
109
+ transition: all 0.3s ease;
110
+ font-family: 'Poppins', sans-serif;
111
+ width: 100%;
112
+ margin-bottom: 1rem;
113
+ }
114
+
115
+ button:hover {
116
+ background-color: var(--secondary-color);
117
+ transform: translateY(-2px);
118
+ box-shadow: 0 4px 12px rgba(67, 97, 238, 0.3);
119
+ }
120
+
121
+ .answer-box {
122
+ width: 100%;
123
+ min-height: 100px;
124
+ padding: 1rem;
125
+ margin-bottom: 1rem;
126
+ border: 1px solid #ddd;
127
+ border-radius: var(--border-radius);
128
+ font-family: 'Poppins', sans-serif;
129
+ resize: vertical;
130
+ transition: border-color 0.3s ease;
131
+ }
132
+
133
+ .answer-box:focus {
134
+ outline: none;
135
+ border-color: var(--accent-color);
136
+ box-shadow: 0 0 0 3px rgba(72, 149, 239, 0.2);
137
+ }
138
+
139
+ table {
140
+ width: 100%;
141
+ border-collapse: separate;
142
+ border-spacing: 0;
143
+ margin-top: 1.5rem;
144
+ background: white;
145
+ border-radius: var(--border-radius);
146
+ overflow: hidden;
147
+ box-shadow: var(--box-shadow);
148
+ }
149
+
150
+ th, td {
151
+ padding: 1rem;
152
+ text-align: left;
153
+ border-bottom: 1px solid #eee;
154
+ }
155
+
156
+ th {
157
+ background-color: var(--primary-color);
158
+ color: white;
159
+ font-weight: 500;
160
+ }
161
+
162
+ tr:hover {
163
+ background-color: #f8f9fa;
164
+ }
165
+
166
+ .hidden {
167
+ display: none;
168
+ }
169
+
170
+ /* Responsive Design */
171
+ @media (max-width: 768px) {
172
+ body {
173
+ padding: 1rem;
174
+ }
175
+
176
+ .container {
177
+ padding: 1rem;
178
+ }
179
+
180
+ button {
181
+ padding: 0.7rem 1rem;
182
+ }
183
+ }
184
+
185
+ /* Animation */
186
+ @keyframes fadeIn {
187
+ from { opacity: 0; transform: translateY(10px); }
188
+ to { opacity: 1; transform: translateY(0); }
189
+ }
190
+
191
+ .section {
192
+ animation: fadeIn 0.5s ease-out;
193
+ }
194
+ </style>
195
+ </head>
196
+ <body>
197
+ <div class="container">
198
+ <div class="section">
199
+ <h2>Upload Query CSV File</h2>
200
+ <div id="query-upload">
201
+ <label for="query-file">Query File:</label>
202
+ <div class="upload-container">
203
+ <input type="file" id="query-file" accept=".csv">
204
+ </div>
205
+ </div>
206
+ </div>
207
+
208
+ <div class="section">
209
+ <h2>Answer Generation</h2>
210
+ <label for="file-type">Select File Type:</label>
211
+ <select id="file-type" onchange="handleFileTypeChange()">
212
+ <option value="pdf">PDF</option>
213
+ <option value="csv">CSV</option>
214
+ </select>
215
+
216
+ <div id="csv-upload" class="hidden">
217
+ <label for="csv-file">Upload Answer CSV File:</label>
218
+ <div class="upload-container">
219
+ <input type="file" id="csv-file" accept=".csv">
220
+ </div>
221
+ </div>
222
+
223
+ <button id="compute-btn" onclick="computeAnswers()">Compute Answers</button>
224
+ </div>
225
+
226
+ <div class="section">
227
+ <h2>Student Answers Upload</h2>
228
+ <label for="folder-upload">Upload Student Answers Folder:</label>
229
+ <div class="upload-container">
230
+ <input type="file" id="folder-upload" webkitdirectory directory multiple>
231
+ <small class="help-text">Select the folder containing student answer images</small>
232
+ </div>
233
+ </div>
234
+
235
+ <div class="section">
236
+ <div id="answers-container"></div>
237
+ <button id="compute-marks-btn" onclick="computeMarks()">Compute Marks</button>
238
+ <div id="marks-table-container"></div>
239
+ </div>
240
+ </div>
241
+
242
+ <script>
243
+ function handleFileTypeChange() {
244
+ const fileType = document.getElementById('file-type').value;
245
+ const csvUpload = document.getElementById('csv-upload');
246
+ if (fileType === 'csv') {
247
+ csvUpload.classList.remove('hidden'); // Show the CSV upload section
248
+ } else {
249
+ csvUpload.classList.add('hidden'); // Hide the CSV upload section
250
+ }
251
+ }
252
+
253
+ async function computeAnswers() {
254
+ try {
255
+ const fileType = document.getElementById('file-type').value;
256
+ const queryfile = document.getElementById('query-file').files[0];
257
+ const anscsvFile = document.getElementById('csv-file').files[0];
258
+
259
+ const formData = new FormData();
260
+ formData.append('file_type', fileType);
261
+ formData.append('query_file', queryfile);
262
+ if (anscsvFile) {
263
+ formData.append('ans_csv_file', anscsvFile);
264
+ }
265
+
266
+ const response = await fetch('/compute_answers', { method: 'POST', body: formData });
267
+ const result = await response.json();
268
+ if (result.answers) {
269
+ displayAnswers(result.answers);
270
+ } else {
271
+ console.error('No answers received:', result);
272
+ }
273
+ } catch (error) {
274
+ console.error('Error:', error);
275
+ }
276
+ }
277
+
278
+
279
+ function displayAnswers(answers) {
280
+ const container = document.getElementById('answers-container');
281
+ container.innerHTML = ''; // Clear previous answers
282
+
283
+ answers.forEach(answer => {
284
+ const textBox = document.createElement('textarea');
285
+ textBox.className = 'answer-box';
286
+ textBox.value = answer.join('\n\n'); // Set the answer as the value of the text box
287
+ container.appendChild(textBox);
288
+ });
289
+ }
290
+
291
+ async function computeMarks() {
292
+ try {
293
+ const answerBoxes = document.querySelectorAll('.answer-box');
294
+ const answers = answerBoxes.length === 1 ? [answerBoxes[0].value.trim()] : Array.from(answerBoxes).map(box => box.value.trim());
295
+
296
+ // Create FormData and append answers
297
+ const formData = new FormData();
298
+ formData.append('answers', JSON.stringify(answers));
299
+
300
+ // Handle folder upload
301
+ const folderInput = document.getElementById('folder-upload');
302
+ const files = folderInput.files;
303
+
304
+ // Append each file with its relative path
305
+ for (let i = 0; i < files.length; i++) {
306
+ const file = files[i];
307
+ const relativePath = file.webkitRelativePath;
308
+ formData.append('files[]', file, relativePath);
309
+ }
310
+
311
+ const response = await fetch('/compute_marks', {
312
+ method: 'POST',
313
+ body: formData
314
+ });
315
+ const result = await response.json();
316
+
317
+ if (result) {
318
+ displayMarksTable(result);
319
+ } else {
320
+ console.error('No marks data received:', result);
321
+ }
322
+ } catch (error) {
323
+ console.error('Error:', error);
324
+ }
325
+ }
326
+
327
+ function displayMarksTable(data) {
328
+ // Access the 'message' property which contains the actual marks data
329
+ const marksData = data.message;
330
+
331
+ console.log('Marks Data Received:', marksData);
332
+
333
+ const container = document.getElementById('marks-table-container');
334
+ container.innerHTML = ''; // Clear previous table
335
+
336
+ // Create table
337
+ const table = document.createElement('table');
338
+ table.innerHTML = `
339
+ <tr>
340
+ <th>Name</th>
341
+ <th>Question Number</th>
342
+ <th>Marks</th>
343
+ </tr>
344
+ `;
345
+
346
+ for (const [name, marks] of Object.entries(marksData)) {
347
+ if (!Array.isArray(marks)) {
348
+ console.error('Invalid marks for ${name}:', marks);
349
+ continue; // Skip invalid entries
350
+ }
351
+
352
+ marks.forEach((mark, index) => {
353
+ const row = document.createElement('tr');
354
+ row.innerHTML = `
355
+ <td>${name}</td>
356
+ <td>${index + 1}</td>
357
+ <td>${mark.toFixed(2)}</td>
358
+ `;
359
+ table.appendChild(row);
360
+ });
361
+ }
362
+
363
+ container.appendChild(table);
364
+ }
365
+ </script>
366
+ </body>
367
+ </html>