Commit
·
51c49bc
1
Parent(s):
92ba605
Initial upload of answer grading application
Browse files- Dockerfile +10 -0
- HTR/app.py +23 -0
- HTR/hcr.py +27 -0
- HTR/spell_and_gramer_check.py +39 -0
- HTR/strike.py +45 -0
- HTR/word.py +288 -0
- all_models.py +28 -0
- correct_answer_generation/answer_generation.py +46 -0
- correct_answer_generation/answer_generation_database_creation.py +22 -0
- correct_answer_generation/create_database.py +81 -0
- correct_answer_generation/related_content_creation.py +29 -0
- main.py +217 -0
- models/vit-base-beans/all_results.json +13 -0
- models/vit-base-beans/config.json +24 -0
- models/vit-base-beans/eval_results.json +8 -0
- models/vit-base-beans/model.safetensors +3 -0
- models/vit-base-beans/runs/Jun06_19-03-39_DELL/events.out.tfevents.1717715031.DELL.3528.0 +3 -0
- models/vit-base-beans/runs/Jun06_19-03-39_DELL/events.out.tfevents.1717720711.DELL.3528.1 +3 -0
- models/vit-base-beans/train_results.json +8 -0
- models/vit-base-beans/trainer_state.json +307 -0
- models/vit-base-beans/training_args.bin +3 -0
- similarity_check/llm_based_scoring/llm.py +76 -0
- similarity_check/semantic_meaning_check/semantic.py +82 -0
- similarity_check/tf_idf/tf_idf_score.py +142 -0
- templates/index.html +367 -0
Dockerfile
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
|
5 |
+
COPY ./requirements.txt /code/requirements.txt
|
6 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
7 |
+
|
8 |
+
COPY . /code
|
9 |
+
|
10 |
+
CMD ["python", "main.py"]
|
HTR/app.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
|
3 |
+
from HTR.word import convert_image
|
4 |
+
from HTR.strike import struck_images
|
5 |
+
from HTR.hcr import text
|
6 |
+
from HTR.spell_and_gramer_check import spell_grammer
|
7 |
+
|
8 |
+
# Define a function to extract text from an image
|
9 |
+
def extract_text_from_image(img_path):
|
10 |
+
img = cv2.imread(img_path)
|
11 |
+
# print(img)
|
12 |
+
imgs = convert_image(img)
|
13 |
+
images_path = struck_images(imgs)
|
14 |
+
t = text(images_path)
|
15 |
+
# print("\n\n\n\n\n\n\n")
|
16 |
+
# print(t)
|
17 |
+
t = spell_grammer(t)
|
18 |
+
# t = text
|
19 |
+
# print("\n\n\n\n\n\n\n")
|
20 |
+
# print(t)
|
21 |
+
return t
|
22 |
+
|
23 |
+
# extract_text_from_image("ans_image/1.jpg")
|
HTR/hcr.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
2 |
+
from PIL import Image
|
3 |
+
import cv2
|
4 |
+
|
5 |
+
|
6 |
+
MODEL_NAME = "microsoft/trocr-large-handwritten"
|
7 |
+
processor = TrOCRProcessor.from_pretrained(MODEL_NAME)
|
8 |
+
model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME)
|
9 |
+
|
10 |
+
|
11 |
+
def text(image_cv):
|
12 |
+
t = ""
|
13 |
+
for i in image_cv:
|
14 |
+
img_rgb = cv2.cvtColor(i, cv2.COLOR_BGR2RGB)
|
15 |
+
image = Image.fromarray(img_rgb)
|
16 |
+
|
17 |
+
# image = Image.open(i).convert("RGB")
|
18 |
+
pixel_values = processor(image, return_tensors="pt").pixel_values
|
19 |
+
generated_ids = model.generate(pixel_values)
|
20 |
+
|
21 |
+
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
22 |
+
t = t+generated_text.replace(" ", "")+ " "
|
23 |
+
|
24 |
+
# print(t)
|
25 |
+
|
26 |
+
return t
|
27 |
+
|
HTR/spell_and_gramer_check.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import language_tool_python
|
2 |
+
from spellchecker import SpellChecker
|
3 |
+
|
4 |
+
tool = language_tool_python.LanguageTool('en-US')
|
5 |
+
|
6 |
+
def check_grammar(answer):
|
7 |
+
|
8 |
+
my_matches = tool.check(answer)
|
9 |
+
corrected_text = tool.correct(answer)
|
10 |
+
return corrected_text
|
11 |
+
|
12 |
+
def correct_spelling(text):
|
13 |
+
spell = SpellChecker()
|
14 |
+
words = text.split()
|
15 |
+
|
16 |
+
# Find misspelled words
|
17 |
+
misspelled = spell.unknown(words)
|
18 |
+
|
19 |
+
# Correct misspelled words
|
20 |
+
corrected_text = []
|
21 |
+
for word in words:
|
22 |
+
if word in misspelled:
|
23 |
+
correction = spell.correction(word)
|
24 |
+
# If no correction found or correction is None, keep the original word
|
25 |
+
if correction is None:
|
26 |
+
corrected_text.append(word)
|
27 |
+
else:
|
28 |
+
corrected_text.append(correction)
|
29 |
+
else:
|
30 |
+
corrected_text.append(word)
|
31 |
+
|
32 |
+
return " ".join(map(str, corrected_text))
|
33 |
+
|
34 |
+
def spell_grammer(text):
|
35 |
+
spell_check_text = correct_spelling(text)
|
36 |
+
|
37 |
+
corrected_text = check_grammar(spell_check_text)
|
38 |
+
|
39 |
+
return corrected_text
|
HTR/strike.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import numpy as np
|
3 |
+
import tensorflow as tf
|
4 |
+
import torch
|
5 |
+
import os
|
6 |
+
import cv2
|
7 |
+
from transformers import AutoModelForImageClassification
|
8 |
+
|
9 |
+
def image_preprocessing(image):
|
10 |
+
images=[]
|
11 |
+
for i in image:
|
12 |
+
binary_image = i
|
13 |
+
binary_image = cv2.resize(binary_image, (224, 224))
|
14 |
+
binary_image = cv2.merge([binary_image, binary_image, binary_image])
|
15 |
+
binary_image = binary_image/255
|
16 |
+
binary_image = torch.from_numpy(binary_image)
|
17 |
+
images.append(binary_image)
|
18 |
+
return images
|
19 |
+
|
20 |
+
def predict_image(image_path, model):
|
21 |
+
preprocessed_img = image_preprocessing(image_path)
|
22 |
+
images = torch.stack(preprocessed_img)
|
23 |
+
images = images.permute(0, 3, 1, 2)
|
24 |
+
predictions = model(images).logits.detach().numpy()
|
25 |
+
return predictions
|
26 |
+
|
27 |
+
|
28 |
+
model = AutoModelForImageClassification.from_pretrained("models/vit-base-beans")
|
29 |
+
|
30 |
+
def struck_images(word__image):
|
31 |
+
|
32 |
+
|
33 |
+
predictions = predict_image(word__image, model)
|
34 |
+
|
35 |
+
not_struck =[]
|
36 |
+
for i in range(len(predictions)):
|
37 |
+
if predictions[i].argmax().item() == 0:
|
38 |
+
|
39 |
+
not_struck.append(word__image[i])
|
40 |
+
|
41 |
+
# print(not_struck)
|
42 |
+
return not_struck
|
43 |
+
|
44 |
+
|
45 |
+
# struck_images()
|
HTR/word.py
ADDED
@@ -0,0 +1,288 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import cv2
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import sys
|
5 |
+
import os
|
6 |
+
|
7 |
+
|
8 |
+
cordinates =[]
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
def four_point_transform(image, pts):
|
13 |
+
rect = pts
|
14 |
+
(tl, tr, br, bl) = rect
|
15 |
+
|
16 |
+
# Compute the width of the new image
|
17 |
+
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
|
18 |
+
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
|
19 |
+
maxWidth = max(int(widthA), int(widthB))
|
20 |
+
|
21 |
+
# Compute the height of the new image
|
22 |
+
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
|
23 |
+
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
|
24 |
+
maxHeight = max(int(heightA), int(heightB))
|
25 |
+
|
26 |
+
dst = np.array([
|
27 |
+
[0, 0],
|
28 |
+
[maxWidth - 1, 0],
|
29 |
+
[maxWidth - 1, maxHeight - 1],
|
30 |
+
[0, maxHeight - 1]], dtype="float32")
|
31 |
+
|
32 |
+
rect = np.array(rect, dtype="float32")
|
33 |
+
|
34 |
+
M = cv2.getPerspectiveTransform(rect, dst)
|
35 |
+
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
|
36 |
+
|
37 |
+
return warped
|
38 |
+
|
39 |
+
|
40 |
+
def remove_shadow(image):
|
41 |
+
rgb_planes = cv2.split(image)
|
42 |
+
|
43 |
+
result_planes = []
|
44 |
+
result_norm_planes = []
|
45 |
+
for plane in rgb_planes:
|
46 |
+
dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
|
47 |
+
bg_img = cv2.medianBlur(dilated_img, 21)
|
48 |
+
diff_img = 255 - cv2.absdiff(plane, bg_img)
|
49 |
+
norm_img = cv2.normalize(diff_img,None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
|
50 |
+
result_planes.append(diff_img)
|
51 |
+
result_norm_planes.append(norm_img)
|
52 |
+
|
53 |
+
result = cv2.merge(result_planes)
|
54 |
+
result_norm = cv2.merge(result_norm_planes)
|
55 |
+
|
56 |
+
return result,result_norm
|
57 |
+
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
def analise(image):
|
62 |
+
global line, binary_image1, x_scaling , y_scaling
|
63 |
+
kernel = np.ones((1,250),np.uint8)
|
64 |
+
|
65 |
+
dilation = cv2.dilate(image, kernel, iterations = 2)
|
66 |
+
|
67 |
+
# cv2.namedWindow("Image", cv2.WINDOW_NORMAL)
|
68 |
+
# cv2.imshow('Image',dilation)
|
69 |
+
# cv2.waitKey(0)
|
70 |
+
|
71 |
+
contours, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
72 |
+
|
73 |
+
for i in reversed(contours):
|
74 |
+
x, y, w, h = cv2.boundingRect(i)
|
75 |
+
if cv2.contourArea(i)<20 :
|
76 |
+
continue
|
77 |
+
elif h < 8:
|
78 |
+
continue
|
79 |
+
else:
|
80 |
+
scaling_factor_in_y = 0.5
|
81 |
+
scaling_factor_in_x = 0
|
82 |
+
resized_contour = i.copy()
|
83 |
+
|
84 |
+
resized_contour = i * [x_scaling, y_scaling]
|
85 |
+
|
86 |
+
resized_contour = resized_contour.astype(int)
|
87 |
+
final_image__ = np.zeros_like(binary_image1)
|
88 |
+
cv2.drawContours(final_image__, [resized_contour], 0, (255), -1)
|
89 |
+
|
90 |
+
kernel_dil = np.ones((3,3),np.uint8)
|
91 |
+
final_image__ = cv2.dilate(final_image__,kernel_dil,iterations = 3)
|
92 |
+
|
93 |
+
|
94 |
+
line_image_final = cv2.bitwise_and(final_image__, binary_image1)
|
95 |
+
line.append(line_image_final)
|
96 |
+
# cv2.namedWindow("Line image", cv2.WINDOW_NORMAL)
|
97 |
+
# cv2.imshow('Line image',line_image_final)
|
98 |
+
# cv2.waitKey(0)
|
99 |
+
|
100 |
+
|
101 |
+
|
102 |
+
def image_resize_and_errosion(image):
|
103 |
+
|
104 |
+
height, width = image.shape[:2]
|
105 |
+
height = height + 1 * height
|
106 |
+
height = int(height)
|
107 |
+
|
108 |
+
resized_image = cv2.resize(image, (width, height))
|
109 |
+
|
110 |
+
kernel = np.ones((13,1),np.uint8)
|
111 |
+
|
112 |
+
erosion = cv2.erode(resized_image,kernel,iterations = 1)
|
113 |
+
|
114 |
+
return erosion
|
115 |
+
|
116 |
+
|
117 |
+
x_scaling = 0
|
118 |
+
y_scaling = 0
|
119 |
+
binary_image1 = 0
|
120 |
+
line = 0
|
121 |
+
line_length = 0
|
122 |
+
count = 0
|
123 |
+
|
124 |
+
def convert_image(img):
|
125 |
+
folder_path = 'images'
|
126 |
+
|
127 |
+
for filename in os.listdir(folder_path):
|
128 |
+
file_path = os.path.join(folder_path, filename)
|
129 |
+
try:
|
130 |
+
if os.path.isfile(file_path):
|
131 |
+
os.remove(file_path)
|
132 |
+
except Exception as e:
|
133 |
+
print(f"Error deleting file {file_path}: {e}")
|
134 |
+
|
135 |
+
|
136 |
+
|
137 |
+
global x_scaling,y_scaling,binary_image1,line,line_lenght,count
|
138 |
+
# img = cv2.imread(image_file)
|
139 |
+
img_copy = np.copy(img)
|
140 |
+
line_lenght = 250
|
141 |
+
rect_image = img
|
142 |
+
|
143 |
+
# removing the shadow in the image
|
144 |
+
image1, image2_ = remove_shadow(rect_image)
|
145 |
+
|
146 |
+
# converting into grayscale
|
147 |
+
gray_ = cv2.cvtColor(image2_,cv2.COLOR_BGR2GRAY)
|
148 |
+
|
149 |
+
# cv2.namedWindow("grayscale image", cv2.WINDOW_NORMAL)
|
150 |
+
# cv2.imshow('grayscale image',gray_)
|
151 |
+
# cv2.waitKey(0)
|
152 |
+
|
153 |
+
# convrting into binaryimage
|
154 |
+
_, binary_image_ = cv2.threshold(gray_, 200, 255, cv2.THRESH_BINARY)
|
155 |
+
# cv2.namedWindow("binary image", cv2.WINDOW_NORMAL)
|
156 |
+
# cv2.imshow('binary image',binary_image_)
|
157 |
+
# cv2.waitKey(0)
|
158 |
+
|
159 |
+
inverted_binary_image_ = 255 - binary_image_
|
160 |
+
|
161 |
+
binary_image1 = np.copy(inverted_binary_image_)
|
162 |
+
|
163 |
+
y_height ,x_width= rect_image.shape[:2]
|
164 |
+
|
165 |
+
# print("image width, height =", x_width, y_height)
|
166 |
+
|
167 |
+
# resizing the image
|
168 |
+
new_width = 500*5
|
169 |
+
new_height = 705*5
|
170 |
+
|
171 |
+
x_scaling = x_width/new_width
|
172 |
+
y_scaling = y_height/new_height
|
173 |
+
|
174 |
+
# print("After resizing width, height", new_width , new_height)
|
175 |
+
rect_image = cv2.resize(rect_image, (new_width, new_height), interpolation=cv2.INTER_NEAREST)
|
176 |
+
# cv2.namedWindow("resized image", cv2.WINDOW_NORMAL)
|
177 |
+
# cv2.imshow('resized image',rect_image)
|
178 |
+
# cv2.waitKey(0)
|
179 |
+
|
180 |
+
# removing the shadow in the image
|
181 |
+
image1, image2 = remove_shadow(rect_image)
|
182 |
+
|
183 |
+
# converting into grayscale
|
184 |
+
gray = cv2.cvtColor(image2,cv2.COLOR_BGR2GRAY)
|
185 |
+
# cv2.namedWindow("grayscale image", cv2.WINDOW_NORMAL)
|
186 |
+
# cv2.imshow('grayscale image',gray)
|
187 |
+
# cv2.waitKey(0)
|
188 |
+
|
189 |
+
# convrting into binaryimage
|
190 |
+
_, binary_image = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
|
191 |
+
_, binary_image = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
|
192 |
+
# cv2.namedWindow("binary image", cv2.WINDOW_NORMAL)
|
193 |
+
# cv2.imshow('binary image',gray)
|
194 |
+
# cv2.waitKey(0)
|
195 |
+
|
196 |
+
# inverting the pixel
|
197 |
+
inverted_binary_image = 255 - binary_image
|
198 |
+
|
199 |
+
kernel = np.ones((2,2),np.uint8)
|
200 |
+
|
201 |
+
|
202 |
+
# performing erosion to remove noise
|
203 |
+
erosion = cv2.erode(inverted_binary_image,kernel,iterations = 1)
|
204 |
+
# cv2.namedWindow("erosion", cv2.WINDOW_NORMAL)
|
205 |
+
# cv2.imshow('erosion',erosion)
|
206 |
+
# cv2.waitKey(0)
|
207 |
+
|
208 |
+
|
209 |
+
# performing Dilution operatiom
|
210 |
+
dilation = cv2.dilate(erosion,kernel,iterations = 1)
|
211 |
+
# cv2.namedWindow("dilation", cv2.WINDOW_NORMAL)
|
212 |
+
# cv2.imshow('dilation',erosion)
|
213 |
+
# cv2.waitKey(0)
|
214 |
+
|
215 |
+
|
216 |
+
new_image = np.copy(dilation)
|
217 |
+
new_image = 255 - new_image
|
218 |
+
|
219 |
+
|
220 |
+
# defining kernal size
|
221 |
+
kernel = np.ones((1,250),np.uint8)
|
222 |
+
|
223 |
+
|
224 |
+
# performing Dilution operatiom
|
225 |
+
dilation_1 = cv2.dilate(dilation,kernel,iterations = 2)
|
226 |
+
# cv2.namedWindow("dilation_1", cv2.WINDOW_NORMAL)
|
227 |
+
# cv2.imshow('dilation_1',dilation_1)
|
228 |
+
# cv2.waitKey(0)
|
229 |
+
|
230 |
+
contours, _ = cv2.findContours(dilation_1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
231 |
+
|
232 |
+
line = []
|
233 |
+
# line saparation
|
234 |
+
for i in reversed(contours):
|
235 |
+
x, y, w, h = cv2.boundingRect(i)
|
236 |
+
if cv2.contourArea(i)<20:
|
237 |
+
continue
|
238 |
+
elif h < 10:
|
239 |
+
continue
|
240 |
+
else:
|
241 |
+
cv2.drawContours(new_image, [i],-1,(0),2)
|
242 |
+
final_image_ = np.zeros_like(binary_image)
|
243 |
+
cv2.drawContours(final_image_, [i], 0, (255), -1)
|
244 |
+
|
245 |
+
# cv2.namedWindow("final_image_", cv2.WINDOW_NORMAL)
|
246 |
+
# cv2.imshow('final_image_',final_image_)
|
247 |
+
# cv2.waitKey(0)
|
248 |
+
|
249 |
+
|
250 |
+
line_image = cv2.bitwise_and(final_image_, dilation)
|
251 |
+
# cv2.namedWindow("line_image", cv2.WINDOW_NORMAL)
|
252 |
+
# cv2.imshow('line_image',line_image)
|
253 |
+
# cv2.waitKey(0)
|
254 |
+
|
255 |
+
|
256 |
+
analise(line_image)
|
257 |
+
|
258 |
+
|
259 |
+
count = 0
|
260 |
+
kernel1 = np.ones((8,8),np.uint8)
|
261 |
+
word__image = [] # newly added
|
262 |
+
for line_image in line:
|
263 |
+
|
264 |
+
dilation_2 = cv2.dilate(line_image,kernel1,iterations = 2)
|
265 |
+
|
266 |
+
contours1, _ = cv2.findContours(dilation_2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
267 |
+
|
268 |
+
sorted_contours = sorted(contours1, key=lambda c: cv2.boundingRect(c)[0])
|
269 |
+
|
270 |
+
for j in sorted_contours:
|
271 |
+
x1,y1,w1,h1 = cv2.boundingRect(j)
|
272 |
+
final_image = line_image[y1:y1+h1,x1:x1+w1]
|
273 |
+
image_name ="images/"+str(count)+".png"
|
274 |
+
final_image = 255 - final_image
|
275 |
+
word__image.append(final_image)# newly added
|
276 |
+
# cv2.imwrite(image_name, final_image)
|
277 |
+
count=count+1
|
278 |
+
|
279 |
+
# cv2.waitKey(0)
|
280 |
+
# cv2.destroyAllWindows()
|
281 |
+
return word__image
|
282 |
+
|
283 |
+
|
284 |
+
|
285 |
+
|
286 |
+
# img = cv2.imread("ans_image/1.jpg")
|
287 |
+
# convert_image(img)
|
288 |
+
|
all_models.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer
|
2 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
3 |
+
|
4 |
+
class ModelSingleton:
|
5 |
+
_instance = None
|
6 |
+
_initialized = False
|
7 |
+
|
8 |
+
def __new__(cls):
|
9 |
+
if cls._instance is None:
|
10 |
+
cls._instance = super().__new__(cls)
|
11 |
+
return cls._instance
|
12 |
+
|
13 |
+
def __init__(self):
|
14 |
+
if not self._initialized:
|
15 |
+
# Sentence transformer model
|
16 |
+
SENTENCE_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
17 |
+
self.similarity_tokenizer = AutoTokenizer.from_pretrained(SENTENCE_MODEL)
|
18 |
+
self.similarity_model = SentenceTransformer(SENTENCE_MODEL)
|
19 |
+
|
20 |
+
# Flan-T5-xl model only
|
21 |
+
FLAN_MODEL = "google/flan-t5-xl"
|
22 |
+
self.flan_tokenizer = AutoTokenizer.from_pretrained(FLAN_MODEL)
|
23 |
+
self.flan_model = AutoModelForSeq2SeqLM.from_pretrained(FLAN_MODEL)
|
24 |
+
|
25 |
+
self._initialized = True
|
26 |
+
|
27 |
+
# Create a global instance
|
28 |
+
models = ModelSingleton()
|
correct_answer_generation/answer_generation.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import os
|
3 |
+
import torch
|
4 |
+
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
|
5 |
+
from all_models import models
|
6 |
+
|
7 |
+
|
8 |
+
def query_(query, doc):
|
9 |
+
input_text = f"""
|
10 |
+
You are an AI assistant designed to extract relevant information from a document and generate a clear, concise answer.
|
11 |
+
|
12 |
+
Question: {query}
|
13 |
+
|
14 |
+
Provide a *single-paragraph response of 250 words* that summarizes key details, explains the answer logically, and avoids repetition. Ignore irrelevant details like page numbers, author names, and metadata.
|
15 |
+
|
16 |
+
Context:
|
17 |
+
"{doc}"
|
18 |
+
|
19 |
+
Answer:
|
20 |
+
"""
|
21 |
+
|
22 |
+
# Move inputs to the same device as the model
|
23 |
+
device = next(models.flan_model.parameters()).device
|
24 |
+
inputs = models.flan_tokenizer(input_text, return_tensors="pt").to(device)
|
25 |
+
input_length = inputs["input_ids"].shape[1]
|
26 |
+
max_tokens = input_length + 180
|
27 |
+
|
28 |
+
with torch.no_grad():
|
29 |
+
outputs = models.flan_model.generate(
|
30 |
+
**inputs,
|
31 |
+
do_sample=True,
|
32 |
+
max_length=max_tokens,
|
33 |
+
min_length=100,
|
34 |
+
early_stopping=True,
|
35 |
+
temperature=0.7,
|
36 |
+
top_k=50,
|
37 |
+
top_p=0.9,
|
38 |
+
repetition_penalty=1.2,
|
39 |
+
num_beams=3
|
40 |
+
)
|
41 |
+
|
42 |
+
answer = models.flan_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
43 |
+
# print(answer)
|
44 |
+
# answer = extract_answer(answer)
|
45 |
+
return answer
|
46 |
+
|
correct_answer_generation/answer_generation_database_creation.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
from correct_answer_generation.create_database import create_database_main
|
4 |
+
from correct_answer_generation.related_content_creation import doc_creation
|
5 |
+
from correct_answer_generation.answer_generation import query_
|
6 |
+
|
7 |
+
def database_creation(path):
|
8 |
+
create_database_main(path)
|
9 |
+
|
10 |
+
def answer_generation(path,query):
|
11 |
+
# collection_name = os.path.splitext(os.path.basename(path))[0]
|
12 |
+
path = path.replace("/", "_")
|
13 |
+
data = doc_creation(query,path)
|
14 |
+
correct_answers = query_(query,data)
|
15 |
+
return correct_answers
|
16 |
+
|
17 |
+
|
18 |
+
# ans = answer_generation("OperatingSystems","What is the process, and how does it differ from a program?")
|
19 |
+
# # data = doc_creation(q,"OperatingSystems")
|
20 |
+
|
21 |
+
|
22 |
+
|
correct_answer_generation/create_database.py
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import fitz # PyMuPDF
|
2 |
+
import re
|
3 |
+
import chromadb
|
4 |
+
import sys
|
5 |
+
import os
|
6 |
+
import uuid
|
7 |
+
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
|
8 |
+
from all_models import models
|
9 |
+
|
10 |
+
def clean_text(text):
|
11 |
+
# Keep only letters, numbers, punctuation, whitespace, and newlines
|
12 |
+
cleaned_text = re.sub(r"[^a-zA-Z0-9\s.,!?;:'\"()\-]", "", text)
|
13 |
+
return cleaned_text
|
14 |
+
|
15 |
+
def extract_text_from_pdf(pdf_path):
|
16 |
+
text = ""
|
17 |
+
with fitz.open(pdf_path) as doc:
|
18 |
+
for page in doc:
|
19 |
+
page_text = page.get_text()
|
20 |
+
cleaned_text = clean_text(page_text)
|
21 |
+
text += cleaned_text
|
22 |
+
return text
|
23 |
+
|
24 |
+
def clean_data(text):
|
25 |
+
cleaned_text = re.sub(r'\n{2,}', '. \n', text) # Replace multiple newlines with a single newline
|
26 |
+
cleaned_text = re.sub(r' {2,}', '. \n', cleaned_text) # Replace multiple spaces with a newline
|
27 |
+
return cleaned_text.strip() # Strip leading/trailing whitespace
|
28 |
+
|
29 |
+
def combine_list(strings):
|
30 |
+
combined_list = []
|
31 |
+
current_combined = ""
|
32 |
+
for string in strings:
|
33 |
+
word_count = len(string.split())
|
34 |
+
|
35 |
+
if len(current_combined.split()) < 20:
|
36 |
+
current_combined += " " + string.strip() # Adding space before new string
|
37 |
+
|
38 |
+
# If the combined string reaches at least 20 words, add it to the final list
|
39 |
+
if len(current_combined.split()) >= 20:
|
40 |
+
combined_list.append(current_combined) # Strip to remove leading/trailing whitespace
|
41 |
+
current_combined = "" # Reset for the next round
|
42 |
+
if current_combined:
|
43 |
+
combined_list.append(current_combined.strip())
|
44 |
+
return combined_list
|
45 |
+
|
46 |
+
def create_databse(data, name):
|
47 |
+
# Initialize the Persistent Client
|
48 |
+
client = chromadb.PersistentClient(path="correct_answer_generation/chroma_db")
|
49 |
+
|
50 |
+
collection_names = client.list_collections()
|
51 |
+
if name in collection_names:
|
52 |
+
client.delete_collection(name) # Delete the old collection
|
53 |
+
|
54 |
+
# Create a Collection
|
55 |
+
collection = client.create_collection(name)
|
56 |
+
|
57 |
+
# Generate embeddings using the singleton model
|
58 |
+
embeddings = models.similarity_model.encode(data, batch_size=32, convert_to_tensor=True)
|
59 |
+
|
60 |
+
# Create documents and add them to the collection
|
61 |
+
unique_id = [str(uuid.uuid4()) for _ in range(len(embeddings))]
|
62 |
+
|
63 |
+
collection.add(
|
64 |
+
documents=data,
|
65 |
+
ids=unique_id
|
66 |
+
)
|
67 |
+
|
68 |
+
def create_database_main(path):
|
69 |
+
pdf_path = path
|
70 |
+
pdf_text = extract_text_from_pdf(pdf_path)
|
71 |
+
data = clean_data(pdf_text)
|
72 |
+
data = data.split('. \n')
|
73 |
+
for i in range(len(data)):
|
74 |
+
data[i] = re.sub(r' \n', ' ', data[i])
|
75 |
+
data[i] = re.sub(r'\s+', ' ', data[i])
|
76 |
+
data = [text for text in data if len(text) >= 2]
|
77 |
+
data = combine_list(data)
|
78 |
+
|
79 |
+
path = path.replace("/", "_")
|
80 |
+
create_databse(data, path)
|
81 |
+
|
correct_answer_generation/related_content_creation.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import chromadb
|
2 |
+
|
3 |
+
|
4 |
+
def doc_creation(q,collection_name):
|
5 |
+
|
6 |
+
client_ = chromadb.PersistentClient(path="correct_answer_generation/chroma_db")
|
7 |
+
|
8 |
+
# collection = client_.get_collection(name='OperatingSystems')
|
9 |
+
collection = client_.get_collection(name=collection_name)
|
10 |
+
|
11 |
+
|
12 |
+
results = collection.query(
|
13 |
+
query_texts=[q],
|
14 |
+
n_results=7, # how many results to return
|
15 |
+
include = ['documents'] # newly added
|
16 |
+
)
|
17 |
+
|
18 |
+
data = ""
|
19 |
+
for i in results['documents'][0]:
|
20 |
+
data += " "+i
|
21 |
+
# print(i)
|
22 |
+
return data
|
23 |
+
|
24 |
+
# q = "What is the difference between a process and a program?"
|
25 |
+
# data = doc_creation(q,"OperatingSystems")
|
26 |
+
|
27 |
+
|
28 |
+
|
29 |
+
|
main.py
ADDED
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, request, jsonify, render_template
|
2 |
+
import os
|
3 |
+
import json
|
4 |
+
import torch
|
5 |
+
from werkzeug.utils import secure_filename
|
6 |
+
|
7 |
+
from HTR.app import extract_text_from_image
|
8 |
+
|
9 |
+
from correct_answer_generation.answer_generation_database_creation import database_creation, answer_generation
|
10 |
+
|
11 |
+
|
12 |
+
from similarity_check.tf_idf.tf_idf_score import create_tfidf_values, tfidf_answer_score
|
13 |
+
from similarity_check.semantic_meaning_check.semantic import similarity_model_score, fasttext_similarity,question_vector_sentence,question_vector_word
|
14 |
+
from similarity_check.llm_based_scoring.llm import llm_score
|
15 |
+
|
16 |
+
app = Flask(__name__)
|
17 |
+
|
18 |
+
UPLOAD_FOLDER = 'uploads'
|
19 |
+
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
20 |
+
|
21 |
+
@app.route('/')
|
22 |
+
def index():
|
23 |
+
return render_template('index.html')
|
24 |
+
|
25 |
+
|
26 |
+
|
27 |
+
def new_value(value, old_min, old_max, new_min, new_max):
|
28 |
+
new_value = new_min + ((value - old_min) * (new_max - new_min)) / (old_max - old_min)
|
29 |
+
return new_value
|
30 |
+
|
31 |
+
@app.route('/compute_answers', methods=['POST'])
|
32 |
+
def compute_answers():
|
33 |
+
query_file = request.files.get('query_file')
|
34 |
+
if not query_file:
|
35 |
+
return jsonify({"error": "Missing query file"}), 400
|
36 |
+
|
37 |
+
queries = query_file.read().decode('utf-8').splitlines()
|
38 |
+
# print(queries)
|
39 |
+
file_type = request.form.get('file_type')
|
40 |
+
ans_csv_file = request.files.get('ans_csv_file')
|
41 |
+
|
42 |
+
if file_type == "csv":
|
43 |
+
ans_csv_file = ans_csv_file.read().decode('utf-8').splitlines()
|
44 |
+
c_answers = []
|
45 |
+
# print(ans_csv_file)
|
46 |
+
for i in ans_csv_file:
|
47 |
+
c_answers.append(i.split('\\n'))
|
48 |
+
# print(c_answers)
|
49 |
+
return jsonify({"answers": c_answers}), 200
|
50 |
+
|
51 |
+
|
52 |
+
try:
|
53 |
+
c_answers = []
|
54 |
+
|
55 |
+
if file_type == "csv":
|
56 |
+
# Process answer CSV file
|
57 |
+
answers = ans_csv_file.read().decode('utf-8').splitlines()
|
58 |
+
# print(answers)
|
59 |
+
# Implement CSV processing logic
|
60 |
+
c_answers = [f"Processed query: {query}" for query in queries]
|
61 |
+
|
62 |
+
elif file_type == 'pdf':
|
63 |
+
|
64 |
+
for query in queries:
|
65 |
+
folder_path = 'Knowledge_Retriever_pdf'
|
66 |
+
|
67 |
+
pdf_files = [f"{folder_path}/{file}" for file in os.listdir(folder_path) if file.endswith('.pdf')]
|
68 |
+
|
69 |
+
for i in pdf_files:
|
70 |
+
database_creation(i)
|
71 |
+
|
72 |
+
for i in queries:
|
73 |
+
ans = []
|
74 |
+
for j in pdf_files:
|
75 |
+
ans.append(answer_generation(j,i))
|
76 |
+
c_answers.append(ans)
|
77 |
+
|
78 |
+
else:
|
79 |
+
return jsonify({"error": "Unsupported file type"}), 400
|
80 |
+
# print(c_answers)
|
81 |
+
return jsonify({"answers": c_answers}), 200
|
82 |
+
|
83 |
+
except Exception as e:
|
84 |
+
return jsonify({"error": str(e)}), 500
|
85 |
+
|
86 |
+
|
87 |
+
@app.route('/compute_marks', methods=['POST'])
|
88 |
+
def compute_marks():
|
89 |
+
try:
|
90 |
+
a = request.form.get('answers')
|
91 |
+
a = json.loads(a)
|
92 |
+
answers = []
|
93 |
+
for i in a:
|
94 |
+
ans = i.split('\n\n')
|
95 |
+
answers.append(ans)
|
96 |
+
|
97 |
+
# Create temporary directory for uploaded files
|
98 |
+
temp_folder = os.path.join('uploads', 'temp_answers')
|
99 |
+
os.makedirs(temp_folder, exist_ok=True)
|
100 |
+
|
101 |
+
# Process uploaded files
|
102 |
+
files = request.files.getlist('files[]')
|
103 |
+
data = {}
|
104 |
+
|
105 |
+
for file in files:
|
106 |
+
if file.filename.endswith(('.jpg', '.jpeg', '.png')):
|
107 |
+
# Get the relative path from the uploaded folder structure
|
108 |
+
relative_path = file.filename
|
109 |
+
|
110 |
+
# Extract student folder name (first directory in path)
|
111 |
+
path_parts = relative_path.split('/')
|
112 |
+
if len(path_parts) >= 2:
|
113 |
+
student_folder = path_parts[0]
|
114 |
+
|
115 |
+
# Create student directory if it doesn't exist
|
116 |
+
student_path = os.path.join(temp_folder, student_folder)
|
117 |
+
os.makedirs(student_path, exist_ok=True)
|
118 |
+
|
119 |
+
# Save the file
|
120 |
+
save_path = os.path.join(temp_folder, relative_path)
|
121 |
+
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
122 |
+
file.save(save_path)
|
123 |
+
|
124 |
+
# Store file path in data dictionary
|
125 |
+
if student_folder in data:
|
126 |
+
data[student_folder].append(save_path)
|
127 |
+
else:
|
128 |
+
data[student_folder] = [save_path]
|
129 |
+
|
130 |
+
# Sort files for each student
|
131 |
+
for student in data:
|
132 |
+
data[student].sort() # This will sort the file paths alphabetically
|
133 |
+
|
134 |
+
# Rest of your existing marking logic
|
135 |
+
s_marks = {}
|
136 |
+
sen_vec_answers = []
|
137 |
+
word_vec_answers = []
|
138 |
+
|
139 |
+
for i in answers:
|
140 |
+
temp_v = []
|
141 |
+
temp_w = []
|
142 |
+
for j in i:
|
143 |
+
temp_v.append(question_vector_sentence(j))
|
144 |
+
temp_w.append(question_vector_word(j))
|
145 |
+
sen_vec_answers.append(temp_v)
|
146 |
+
word_vec_answers.append(temp_w)
|
147 |
+
|
148 |
+
for i in data:
|
149 |
+
s_marks[i] = []
|
150 |
+
count = 0
|
151 |
+
for j in data[i]:
|
152 |
+
image_path = j
|
153 |
+
s_answer = extract_text_from_image(image_path)
|
154 |
+
tf_idf_word_values, max_tfidf = create_tfidf_values(answers[count])
|
155 |
+
m = marks(s_answer, sen_vec_answers[count], word_vec_answers[count],
|
156 |
+
tf_idf_word_values, max_tfidf, answers[count])
|
157 |
+
if isinstance(m, torch.Tensor):
|
158 |
+
m = m.item()
|
159 |
+
s_marks[i].append(m)
|
160 |
+
count += 1
|
161 |
+
|
162 |
+
# Cleanup temporary files
|
163 |
+
import shutil
|
164 |
+
shutil.rmtree(temp_folder)
|
165 |
+
|
166 |
+
return jsonify({"message": s_marks}), 200
|
167 |
+
|
168 |
+
except Exception as e:
|
169 |
+
return jsonify({"error": str(e)}), 500
|
170 |
+
|
171 |
+
|
172 |
+
|
173 |
+
def marks(answer,sen_vec_answers,word_vec_answers,tf_idf_word_values, max_tfidf,correct_answers):
|
174 |
+
marks = 0
|
175 |
+
marks1 = tfidf_answer_score(answer,tf_idf_word_values,max_tfidf,marks =10)
|
176 |
+
|
177 |
+
if marks1>3:
|
178 |
+
marks += new_value(marks1, old_min = 3, old_max=10, new_min=0, new_max=5)
|
179 |
+
# print("TFIDF Score",float(marks))
|
180 |
+
|
181 |
+
if marks1>2:
|
182 |
+
marks2 = similarity_model_score(sen_vec_answers,answer)
|
183 |
+
a = 0
|
184 |
+
if marks2>0.95:
|
185 |
+
marks += 3
|
186 |
+
a = a+3
|
187 |
+
elif marks2>0.5:
|
188 |
+
marks += new_value(marks2, old_min = 0.5, old_max=0.95, new_min=0, new_max=3)
|
189 |
+
a = a+new_value(marks2, old_min = 0.5, old_max=0.95, new_min=0, new_max=3)
|
190 |
+
# print("sentence-transformers/all-MiniLM-L6-v2 with Cosine Similarity",a)
|
191 |
+
|
192 |
+
marks3 = fasttext_similarity(word_vec_answers,answer)
|
193 |
+
b = 0
|
194 |
+
if marks2>0.9:
|
195 |
+
marks += 2
|
196 |
+
b= b+2
|
197 |
+
elif marks3>0.4:
|
198 |
+
marks += new_value(marks3, old_min = 0.4, old_max=0.9, new_min=0, new_max=2)
|
199 |
+
b=b+new_value(marks3, old_min = 0.4, old_max=0.9, new_min=0, new_max=2)
|
200 |
+
# print("fasttext-wiki-news-subwords-300 with Soft Cosine Similarity",b)
|
201 |
+
|
202 |
+
marks4 = llm_score(correct_answers,answer)
|
203 |
+
for i in range(len(marks4)):
|
204 |
+
marks4[i] = float(marks4[i])
|
205 |
+
|
206 |
+
m = max(marks4)
|
207 |
+
# print("llm score",m/2)
|
208 |
+
marks = marks/2 + m/2
|
209 |
+
|
210 |
+
return marks
|
211 |
+
|
212 |
+
|
213 |
+
|
214 |
+
|
215 |
+
|
216 |
+
if __name__ == '__main__':
|
217 |
+
app.run(host='0.0.0.0', port=7860)
|
models/vit-base-beans/all_results.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 2.2535211267605635,
|
3 |
+
"eval_loss": 0.011280356906354427,
|
4 |
+
"eval_precision": 1.0,
|
5 |
+
"eval_runtime": 120.2175,
|
6 |
+
"eval_samples_per_second": 2.096,
|
7 |
+
"eval_steps_per_second": 0.133,
|
8 |
+
"total_flos": 1.9806952545489715e+17,
|
9 |
+
"train_loss": 0.1075204591266811,
|
10 |
+
"train_runtime": 5558.7629,
|
11 |
+
"train_samples_per_second": 4.08,
|
12 |
+
"train_steps_per_second": 0.255
|
13 |
+
}
|
models/vit-base-beans/config.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "vitpre",
|
3 |
+
"architectures": [
|
4 |
+
"ViTForImageClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.0,
|
7 |
+
"encoder_stride": 16,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.0,
|
10 |
+
"hidden_size": 768,
|
11 |
+
"image_size": 224,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"model_type": "vit",
|
16 |
+
"num_attention_heads": 12,
|
17 |
+
"num_channels": 3,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"patch_size": 16,
|
20 |
+
"problem_type": "single_label_classification",
|
21 |
+
"qkv_bias": true,
|
22 |
+
"torch_dtype": "float32",
|
23 |
+
"transformers_version": "4.41.2"
|
24 |
+
}
|
models/vit-base-beans/eval_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 2.2535211267605635,
|
3 |
+
"eval_loss": 0.011280356906354427,
|
4 |
+
"eval_precision": 1.0,
|
5 |
+
"eval_runtime": 120.2175,
|
6 |
+
"eval_samples_per_second": 2.096,
|
7 |
+
"eval_steps_per_second": 0.133
|
8 |
+
}
|
models/vit-base-beans/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b75581f71443ac2edbb7a6b087a3bb52dd5da0df124ba42b2da04a4c257466f
|
3 |
+
size 343223968
|
models/vit-base-beans/runs/Jun06_19-03-39_DELL/events.out.tfevents.1717715031.DELL.3528.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3365ccc0e41eb48d34859835f9df2f0ffe01b2e26801fc353474dd62e9b396b
|
3 |
+
size 13489
|
models/vit-base-beans/runs/Jun06_19-03-39_DELL/events.out.tfevents.1717720711.DELL.3528.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:677f7fa63a4d50f27d37f5ab5aaf576d62ce8317a5e21a2889dc291b79b3e1c4
|
3 |
+
size 412
|
models/vit-base-beans/train_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 2.2535211267605635,
|
3 |
+
"total_flos": 1.9806952545489715e+17,
|
4 |
+
"train_loss": 0.1075204591266811,
|
5 |
+
"train_runtime": 5558.7629,
|
6 |
+
"train_samples_per_second": 4.08,
|
7 |
+
"train_steps_per_second": 0.255
|
8 |
+
}
|
models/vit-base-beans/trainer_state.json
ADDED
@@ -0,0 +1,307 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.011280356906354427,
|
3 |
+
"best_model_checkpoint": "./vit-base-beans\\checkpoint-130",
|
4 |
+
"epoch": 2.2535211267605635,
|
5 |
+
"eval_steps": 10,
|
6 |
+
"global_step": 160,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.14084507042253522,
|
13 |
+
"grad_norm": 3.220585584640503,
|
14 |
+
"learning_rate": 0.00019859154929577466,
|
15 |
+
"loss": 0.4675,
|
16 |
+
"step": 10
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.14084507042253522,
|
20 |
+
"eval_loss": 0.14601898193359375,
|
21 |
+
"eval_precision": 0.9767441860465116,
|
22 |
+
"eval_runtime": 115.8763,
|
23 |
+
"eval_samples_per_second": 2.175,
|
24 |
+
"eval_steps_per_second": 0.138,
|
25 |
+
"step": 10
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"epoch": 0.28169014084507044,
|
29 |
+
"grad_norm": 0.3653267025947571,
|
30 |
+
"learning_rate": 0.0001971830985915493,
|
31 |
+
"loss": 0.2185,
|
32 |
+
"step": 20
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"epoch": 0.28169014084507044,
|
36 |
+
"eval_loss": 0.18865476548671722,
|
37 |
+
"eval_precision": 0.9264705882352942,
|
38 |
+
"eval_runtime": 119.7761,
|
39 |
+
"eval_samples_per_second": 2.104,
|
40 |
+
"eval_steps_per_second": 0.134,
|
41 |
+
"step": 20
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"epoch": 0.4225352112676056,
|
45 |
+
"grad_norm": 0.9396668672561646,
|
46 |
+
"learning_rate": 0.00019577464788732396,
|
47 |
+
"loss": 0.1316,
|
48 |
+
"step": 30
|
49 |
+
},
|
50 |
+
{
|
51 |
+
"epoch": 0.4225352112676056,
|
52 |
+
"eval_loss": 0.061195846647024155,
|
53 |
+
"eval_precision": 1.0,
|
54 |
+
"eval_runtime": 119.7047,
|
55 |
+
"eval_samples_per_second": 2.105,
|
56 |
+
"eval_steps_per_second": 0.134,
|
57 |
+
"step": 30
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"epoch": 0.5633802816901409,
|
61 |
+
"grad_norm": 1.195994257926941,
|
62 |
+
"learning_rate": 0.00019436619718309861,
|
63 |
+
"loss": 0.081,
|
64 |
+
"step": 40
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"epoch": 0.5633802816901409,
|
68 |
+
"eval_loss": 0.26516667008399963,
|
69 |
+
"eval_precision": 1.0,
|
70 |
+
"eval_runtime": 119.2044,
|
71 |
+
"eval_samples_per_second": 2.114,
|
72 |
+
"eval_steps_per_second": 0.134,
|
73 |
+
"step": 40
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"epoch": 0.704225352112676,
|
77 |
+
"grad_norm": 13.78415584564209,
|
78 |
+
"learning_rate": 0.00019295774647887326,
|
79 |
+
"loss": 0.0995,
|
80 |
+
"step": 50
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"epoch": 0.704225352112676,
|
84 |
+
"eval_loss": 0.14177252352237701,
|
85 |
+
"eval_precision": 0.968,
|
86 |
+
"eval_runtime": 120.7206,
|
87 |
+
"eval_samples_per_second": 2.087,
|
88 |
+
"eval_steps_per_second": 0.133,
|
89 |
+
"step": 50
|
90 |
+
},
|
91 |
+
{
|
92 |
+
"epoch": 0.8450704225352113,
|
93 |
+
"grad_norm": 5.696216106414795,
|
94 |
+
"learning_rate": 0.0001915492957746479,
|
95 |
+
"loss": 0.2069,
|
96 |
+
"step": 60
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"epoch": 0.8450704225352113,
|
100 |
+
"eval_loss": 0.03753811493515968,
|
101 |
+
"eval_precision": 0.984375,
|
102 |
+
"eval_runtime": 122.9296,
|
103 |
+
"eval_samples_per_second": 2.05,
|
104 |
+
"eval_steps_per_second": 0.13,
|
105 |
+
"step": 60
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"epoch": 0.9859154929577465,
|
109 |
+
"grad_norm": 3.0282273292541504,
|
110 |
+
"learning_rate": 0.00019014084507042254,
|
111 |
+
"loss": 0.0379,
|
112 |
+
"step": 70
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"epoch": 0.9859154929577465,
|
116 |
+
"eval_loss": 0.07843092083930969,
|
117 |
+
"eval_precision": 0.9541984732824428,
|
118 |
+
"eval_runtime": 124.2593,
|
119 |
+
"eval_samples_per_second": 2.028,
|
120 |
+
"eval_steps_per_second": 0.129,
|
121 |
+
"step": 70
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 1.1267605633802817,
|
125 |
+
"grad_norm": 5.544398784637451,
|
126 |
+
"learning_rate": 0.0001887323943661972,
|
127 |
+
"loss": 0.1098,
|
128 |
+
"step": 80
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.1267605633802817,
|
132 |
+
"eval_loss": 0.11345015466213226,
|
133 |
+
"eval_precision": 0.9264705882352942,
|
134 |
+
"eval_runtime": 122.7101,
|
135 |
+
"eval_samples_per_second": 2.054,
|
136 |
+
"eval_steps_per_second": 0.13,
|
137 |
+
"step": 80
|
138 |
+
},
|
139 |
+
{
|
140 |
+
"epoch": 1.267605633802817,
|
141 |
+
"grad_norm": 2.5479578971862793,
|
142 |
+
"learning_rate": 0.00018732394366197184,
|
143 |
+
"loss": 0.0958,
|
144 |
+
"step": 90
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"epoch": 1.267605633802817,
|
148 |
+
"eval_loss": 0.12344790995121002,
|
149 |
+
"eval_precision": 0.9130434782608695,
|
150 |
+
"eval_runtime": 177.1319,
|
151 |
+
"eval_samples_per_second": 1.423,
|
152 |
+
"eval_steps_per_second": 0.09,
|
153 |
+
"step": 90
|
154 |
+
},
|
155 |
+
{
|
156 |
+
"epoch": 1.408450704225352,
|
157 |
+
"grad_norm": 0.5559585690498352,
|
158 |
+
"learning_rate": 0.0001859154929577465,
|
159 |
+
"loss": 0.0762,
|
160 |
+
"step": 100
|
161 |
+
},
|
162 |
+
{
|
163 |
+
"epoch": 1.408450704225352,
|
164 |
+
"eval_loss": 0.07210251688957214,
|
165 |
+
"eval_precision": 1.0,
|
166 |
+
"eval_runtime": 124.2559,
|
167 |
+
"eval_samples_per_second": 2.028,
|
168 |
+
"eval_steps_per_second": 0.129,
|
169 |
+
"step": 100
|
170 |
+
},
|
171 |
+
{
|
172 |
+
"epoch": 1.5492957746478875,
|
173 |
+
"grad_norm": 0.07416976243257523,
|
174 |
+
"learning_rate": 0.00018450704225352114,
|
175 |
+
"loss": 0.0088,
|
176 |
+
"step": 110
|
177 |
+
},
|
178 |
+
{
|
179 |
+
"epoch": 1.5492957746478875,
|
180 |
+
"eval_loss": 0.01765807531774044,
|
181 |
+
"eval_precision": 1.0,
|
182 |
+
"eval_runtime": 214.9118,
|
183 |
+
"eval_samples_per_second": 1.173,
|
184 |
+
"eval_steps_per_second": 0.074,
|
185 |
+
"step": 110
|
186 |
+
},
|
187 |
+
{
|
188 |
+
"epoch": 1.6901408450704225,
|
189 |
+
"grad_norm": 0.0715404525399208,
|
190 |
+
"learning_rate": 0.0001830985915492958,
|
191 |
+
"loss": 0.0085,
|
192 |
+
"step": 120
|
193 |
+
},
|
194 |
+
{
|
195 |
+
"epoch": 1.6901408450704225,
|
196 |
+
"eval_loss": 0.14363093674182892,
|
197 |
+
"eval_precision": 0.9333333333333333,
|
198 |
+
"eval_runtime": 121.1058,
|
199 |
+
"eval_samples_per_second": 2.081,
|
200 |
+
"eval_steps_per_second": 0.132,
|
201 |
+
"step": 120
|
202 |
+
},
|
203 |
+
{
|
204 |
+
"epoch": 1.8309859154929577,
|
205 |
+
"grad_norm": 0.03990806266665459,
|
206 |
+
"learning_rate": 0.00018169014084507045,
|
207 |
+
"loss": 0.0071,
|
208 |
+
"step": 130
|
209 |
+
},
|
210 |
+
{
|
211 |
+
"epoch": 1.8309859154929577,
|
212 |
+
"eval_loss": 0.011280356906354427,
|
213 |
+
"eval_precision": 1.0,
|
214 |
+
"eval_runtime": 120.5316,
|
215 |
+
"eval_samples_per_second": 2.091,
|
216 |
+
"eval_steps_per_second": 0.133,
|
217 |
+
"step": 130
|
218 |
+
},
|
219 |
+
{
|
220 |
+
"epoch": 1.971830985915493,
|
221 |
+
"grad_norm": 0.03454509377479553,
|
222 |
+
"learning_rate": 0.00018028169014084507,
|
223 |
+
"loss": 0.0155,
|
224 |
+
"step": 140
|
225 |
+
},
|
226 |
+
{
|
227 |
+
"epoch": 1.971830985915493,
|
228 |
+
"eval_loss": 0.10382802784442902,
|
229 |
+
"eval_precision": 0.9545454545454546,
|
230 |
+
"eval_runtime": 121.0415,
|
231 |
+
"eval_samples_per_second": 2.082,
|
232 |
+
"eval_steps_per_second": 0.132,
|
233 |
+
"step": 140
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 2.112676056338028,
|
237 |
+
"grad_norm": 0.03133228421211243,
|
238 |
+
"learning_rate": 0.00017887323943661972,
|
239 |
+
"loss": 0.069,
|
240 |
+
"step": 150
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 2.112676056338028,
|
244 |
+
"eval_loss": 0.05120203271508217,
|
245 |
+
"eval_precision": 1.0,
|
246 |
+
"eval_runtime": 121.432,
|
247 |
+
"eval_samples_per_second": 2.075,
|
248 |
+
"eval_steps_per_second": 0.132,
|
249 |
+
"step": 150
|
250 |
+
},
|
251 |
+
{
|
252 |
+
"epoch": 2.2535211267605635,
|
253 |
+
"grad_norm": 0.033982835710048676,
|
254 |
+
"learning_rate": 0.00017746478873239437,
|
255 |
+
"loss": 0.0866,
|
256 |
+
"step": 160
|
257 |
+
},
|
258 |
+
{
|
259 |
+
"epoch": 2.2535211267605635,
|
260 |
+
"eval_loss": 0.0187800545245409,
|
261 |
+
"eval_precision": 0.9921259842519685,
|
262 |
+
"eval_runtime": 123.5401,
|
263 |
+
"eval_samples_per_second": 2.04,
|
264 |
+
"eval_steps_per_second": 0.13,
|
265 |
+
"step": 160
|
266 |
+
},
|
267 |
+
{
|
268 |
+
"epoch": 2.2535211267605635,
|
269 |
+
"step": 160,
|
270 |
+
"total_flos": 1.9806952545489715e+17,
|
271 |
+
"train_loss": 0.1075204591266811,
|
272 |
+
"train_runtime": 5558.7629,
|
273 |
+
"train_samples_per_second": 4.08,
|
274 |
+
"train_steps_per_second": 0.255
|
275 |
+
}
|
276 |
+
],
|
277 |
+
"logging_steps": 10,
|
278 |
+
"max_steps": 1420,
|
279 |
+
"num_input_tokens_seen": 0,
|
280 |
+
"num_train_epochs": 20,
|
281 |
+
"save_steps": 10,
|
282 |
+
"stateful_callbacks": {
|
283 |
+
"EarlyStoppingCallback": {
|
284 |
+
"args": {
|
285 |
+
"early_stopping_patience": 5,
|
286 |
+
"early_stopping_threshold": 0.01
|
287 |
+
},
|
288 |
+
"attributes": {
|
289 |
+
"early_stopping_patience_counter": 0
|
290 |
+
}
|
291 |
+
},
|
292 |
+
"TrainerControl": {
|
293 |
+
"args": {
|
294 |
+
"should_epoch_stop": false,
|
295 |
+
"should_evaluate": false,
|
296 |
+
"should_log": false,
|
297 |
+
"should_save": true,
|
298 |
+
"should_training_stop": true
|
299 |
+
},
|
300 |
+
"attributes": {}
|
301 |
+
}
|
302 |
+
},
|
303 |
+
"total_flos": 1.9806952545489715e+17,
|
304 |
+
"train_batch_size": 16,
|
305 |
+
"trial_name": null,
|
306 |
+
"trial_params": null
|
307 |
+
}
|
models/vit-base-beans/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e900ee99fbfc723afe73ab030c236d4f1d59bb636d495f08883046cfa74531c4
|
3 |
+
size 5048
|
similarity_check/llm_based_scoring/llm.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import sys
|
3 |
+
import os
|
4 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
5 |
+
from all_models import models
|
6 |
+
|
7 |
+
# Remove these lines since we're using the singleton
|
8 |
+
# MODEL_NAME = "google/flan-t5-xl"
|
9 |
+
# model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
|
10 |
+
# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
11 |
+
|
12 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
13 |
+
models.flan_model.to(device)
|
14 |
+
|
15 |
+
def llm_score(correct_answers, answer):
|
16 |
+
score = []
|
17 |
+
|
18 |
+
for correct_answer in correct_answers:
|
19 |
+
print(correct_answer)
|
20 |
+
print(answer)
|
21 |
+
print()
|
22 |
+
print()
|
23 |
+
prompt = (
|
24 |
+
"You are an expert evaluator of answers. Your response must be a *single numeric score (0-10), not a range.*\n\n"
|
25 |
+
|
26 |
+
"The user's answer has been converted from handwriting using OCR, so minor spelling, punctuation, or small word variations may exist. "
|
27 |
+
"Focus on meaning rather than transcription errors.\n\n"
|
28 |
+
|
29 |
+
"### Evaluation Criteria:\n"
|
30 |
+
"- *Correctness (90% weight):* Does the answer accurately convey the meaning of the correct answer?\n"
|
31 |
+
"- *Completeness (10% weight):* Does it cover all key points?\n\n"
|
32 |
+
|
33 |
+
"### Handling OCR Errors:\n"
|
34 |
+
"- Ignore minor spelling/punctuation mistakes that don't affect meaning.\n"
|
35 |
+
"- Penalize only if word substitutions change the meaning.\n\n"
|
36 |
+
|
37 |
+
"### Scoring Guidelines:\n"
|
38 |
+
"- *10:* Fully correct and complete (90-100% accurate).\n"
|
39 |
+
"- *From 9 to 8:* Mostly correct, minor missing details (80-90% accurate).\n"
|
40 |
+
"- *From 7 to 6:* Good but missing some key points (60-80% accurate).\n"
|
41 |
+
"- *From 5 to 4:* Average, with several omissions/errors (40-60% accurate).\n"
|
42 |
+
"- *From 3 to 2:* Poor, major meaning errors (20-40% accurate).\n"
|
43 |
+
"- *From 1 to 0:* Incorrect or irrelevant (less than 20% accurate).\n\n"
|
44 |
+
|
45 |
+
"Compare the answers and assign a *single numeric score (0-10)* based on correctness and completeness.\n\n"
|
46 |
+
|
47 |
+
"Correct answer:\n"
|
48 |
+
f"{correct_answer}\n\n"
|
49 |
+
"User's answer:\n"
|
50 |
+
f"{answer}\n\n"
|
51 |
+
"Final Score (numeric only, strictly between 0 and 10):")
|
52 |
+
|
53 |
+
# Tokenize input prompt
|
54 |
+
inputs = models.flan_tokenizer(prompt, return_tensors="pt").to(device)
|
55 |
+
|
56 |
+
# Generate response
|
57 |
+
with torch.no_grad():
|
58 |
+
outputs = models.flan_model.generate(
|
59 |
+
**inputs,
|
60 |
+
max_length=2048,
|
61 |
+
do_sample=True,
|
62 |
+
num_return_sequences=1,
|
63 |
+
num_beams=5,
|
64 |
+
temperature=0.6,
|
65 |
+
top_p=0.9,
|
66 |
+
early_stopping=True,
|
67 |
+
pad_token_id=models.flan_tokenizer.pad_token_id,
|
68 |
+
eos_token_id=models.flan_tokenizer.eos_token_id,
|
69 |
+
bos_token_id=models.flan_tokenizer.bos_token_id,
|
70 |
+
)
|
71 |
+
|
72 |
+
# Decode and print response
|
73 |
+
print(models.flan_tokenizer.decode(outputs[0], skip_special_tokens=True))
|
74 |
+
score.append(models.flan_tokenizer.decode(outputs[0], skip_special_tokens=True))
|
75 |
+
|
76 |
+
return score
|
similarity_check/semantic_meaning_check/semantic.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sentence_transformers import util
|
2 |
+
from nltk.corpus import stopwords
|
3 |
+
from nltk.tokenize import word_tokenize
|
4 |
+
from gensim.models import KeyedVectors
|
5 |
+
import numpy as np
|
6 |
+
import nltk
|
7 |
+
from gensim import corpora
|
8 |
+
from gensim.models import FastText
|
9 |
+
from gensim.similarities import SparseTermSimilarityMatrix, WordEmbeddingSimilarityIndex
|
10 |
+
from gensim.downloader import load
|
11 |
+
import sys
|
12 |
+
import os
|
13 |
+
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
14 |
+
from all_models import models
|
15 |
+
|
16 |
+
# Keep fasttext as is
|
17 |
+
fasttext = load('fasttext-wiki-news-subwords-300')
|
18 |
+
|
19 |
+
# nltk.download('punkt')
|
20 |
+
# nltk.download('stopwords')
|
21 |
+
|
22 |
+
def question_vector_sentence(correct_answer):
|
23 |
+
return models.similarity_model.encode(correct_answer, convert_to_tensor=True)
|
24 |
+
|
25 |
+
def similarity_model_score(correct_answer_vector, answer):
|
26 |
+
answer_embedding = models.similarity_model.encode(answer, convert_to_tensor=True)
|
27 |
+
cosine_score = float('-inf')
|
28 |
+
for i in correct_answer_vector:
|
29 |
+
cosine_score = max(cosine_score, util.pytorch_cos_sim(i, answer_embedding))
|
30 |
+
return cosine_score
|
31 |
+
|
32 |
+
def preprocess(sentence):
|
33 |
+
# Lowercase and remove punctuation
|
34 |
+
sentence = sentence.lower()
|
35 |
+
# Tokenize
|
36 |
+
words = word_tokenize(sentence)
|
37 |
+
# Remove stop words
|
38 |
+
words = [word for word in words if word not in stopwords.words('english')]
|
39 |
+
return words
|
40 |
+
|
41 |
+
def sentence_to_vec(tokens, model):
|
42 |
+
# Filter words that are in the Word2Vec vocabulary
|
43 |
+
valid_words = [word for word in tokens if word in model]
|
44 |
+
|
45 |
+
# If there are no valid words, return a zero vector
|
46 |
+
if not valid_words:
|
47 |
+
return np.zeros(model.vector_size)
|
48 |
+
|
49 |
+
# Compute the average vector
|
50 |
+
word_vectors = [model[word] for word in valid_words]
|
51 |
+
sentence_vector = np.mean(word_vectors, axis=0)
|
52 |
+
|
53 |
+
return sentence_vector
|
54 |
+
|
55 |
+
def compute_scm(tokens1, tokens2, model):
|
56 |
+
dictionary = corpora.Dictionary([tokens1, tokens2])
|
57 |
+
tokens1 = dictionary.doc2bow(tokens1)
|
58 |
+
tokens2 = dictionary.doc2bow(tokens2)
|
59 |
+
termsim_index = WordEmbeddingSimilarityIndex(model)
|
60 |
+
termsim_matrix = SparseTermSimilarityMatrix(termsim_index, dictionary)
|
61 |
+
similarity = termsim_matrix.inner_product(tokens1, tokens2, normalized=(True, True))
|
62 |
+
return similarity
|
63 |
+
|
64 |
+
def question_vector_word(correct_answer):
|
65 |
+
return preprocess(correct_answer)
|
66 |
+
|
67 |
+
def fasttext_similarity(correct_answer_vector, answer):
|
68 |
+
preprocess_answer = preprocess(answer)
|
69 |
+
soft_cosine = float('-inf')
|
70 |
+
|
71 |
+
for i in correct_answer_vector:
|
72 |
+
soft_cosine = max(compute_scm(i, preprocess_answer, fasttext), soft_cosine)
|
73 |
+
|
74 |
+
return soft_cosine
|
75 |
+
|
76 |
+
|
77 |
+
|
78 |
+
|
79 |
+
|
80 |
+
|
81 |
+
|
82 |
+
|
similarity_check/tf_idf/tf_idf_score.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import nltk
|
2 |
+
from nltk.corpus import stopwords
|
3 |
+
from nltk.tokenize import word_tokenize
|
4 |
+
from nltk.corpus import wordnet
|
5 |
+
from collections import Counter
|
6 |
+
import string
|
7 |
+
|
8 |
+
# Check and download required NLTK packages
|
9 |
+
try:
|
10 |
+
stopwords.words('english')
|
11 |
+
except LookupError:
|
12 |
+
print("Downloading required NLTK data...")
|
13 |
+
nltk.download('stopwords')
|
14 |
+
nltk.download('punkt')
|
15 |
+
nltk.download('wordnet')
|
16 |
+
|
17 |
+
def remove_stopwords(sentence):
|
18 |
+
|
19 |
+
# converting into words
|
20 |
+
words = word_tokenize(sentence)
|
21 |
+
|
22 |
+
# Get the set of English stop words
|
23 |
+
stop_words = set(stopwords.words('english'))
|
24 |
+
|
25 |
+
# Remove stop words from the list of words
|
26 |
+
filtered_words = [word for word in words if word.lower() not in stop_words]
|
27 |
+
|
28 |
+
words = [word.lower() for word in words if word.isalpha() and len(word)>1]
|
29 |
+
|
30 |
+
return words
|
31 |
+
|
32 |
+
def get_synonyms(word):
|
33 |
+
synonyms = set()
|
34 |
+
for syn in wordnet.synsets(word):
|
35 |
+
for lemma in syn.lemmas():
|
36 |
+
synonyms.add(lemma.name().lower())
|
37 |
+
return synonyms
|
38 |
+
|
39 |
+
|
40 |
+
def process_sentence(words):
|
41 |
+
|
42 |
+
# Find synonyms for each word
|
43 |
+
synonym_map = {}
|
44 |
+
for word in words:
|
45 |
+
synonyms = get_synonyms(word)
|
46 |
+
synonyms.add(word) # Ensure the word itself is included if no synonyms are found
|
47 |
+
synonym_map[word] = list(synonyms)
|
48 |
+
|
49 |
+
return synonym_map
|
50 |
+
|
51 |
+
def tf(dict1):
|
52 |
+
# print(dict1)
|
53 |
+
no_of_terms_in_document = len(dict1)
|
54 |
+
word_frequency = {}
|
55 |
+
for i in dict1:
|
56 |
+
count = 0
|
57 |
+
for j in dict1:
|
58 |
+
if i in dict1[j]:
|
59 |
+
count+=1
|
60 |
+
word_frequency[i] = count
|
61 |
+
# print(word_frequency)
|
62 |
+
|
63 |
+
for i in word_frequency:
|
64 |
+
word_frequency[i] = word_frequency[i]/no_of_terms_in_document
|
65 |
+
|
66 |
+
return word_frequency
|
67 |
+
|
68 |
+
def idf(di):
|
69 |
+
no_of_documents = len(di)
|
70 |
+
new_dict = {}
|
71 |
+
for d in range(len(di)):
|
72 |
+
for i in di[d]:
|
73 |
+
if i not in new_dict:
|
74 |
+
new_dict[i]=set()
|
75 |
+
new_dict[i].add(d)
|
76 |
+
else:
|
77 |
+
new_dict[i].add(d)
|
78 |
+
|
79 |
+
r = {}
|
80 |
+
for i in new_dict:
|
81 |
+
r[i]=len(new_dict[i])/no_of_documents
|
82 |
+
return r
|
83 |
+
|
84 |
+
def total_tf_idf_value(tf_idf_word_values,synonyms_words):
|
85 |
+
value = 0
|
86 |
+
for i in synonyms_words:
|
87 |
+
for j in synonyms_words[i]:
|
88 |
+
if j in tf_idf_word_values:
|
89 |
+
value += tf_idf_word_values[j]
|
90 |
+
break
|
91 |
+
return value
|
92 |
+
|
93 |
+
|
94 |
+
def create_tfidf_values(correct_answer):
|
95 |
+
correct_answer_words = []
|
96 |
+
for i in correct_answer:
|
97 |
+
correct_answer_words.append(remove_stopwords(i))
|
98 |
+
|
99 |
+
correct_synonyms_words = []
|
100 |
+
|
101 |
+
for i in correct_answer_words:
|
102 |
+
correct_synonyms_words.append(process_sentence(i))
|
103 |
+
|
104 |
+
tf_ = []
|
105 |
+
for i in correct_synonyms_words:
|
106 |
+
tf_.append(tf(i))
|
107 |
+
|
108 |
+
|
109 |
+
idf_values = idf(correct_synonyms_words)
|
110 |
+
|
111 |
+
tf_idf_word_values = {}
|
112 |
+
count = 0
|
113 |
+
for correct_synonyms_word in correct_synonyms_words:
|
114 |
+
for i in correct_synonyms_word:
|
115 |
+
value = tf_[count][i]*idf_values[i]
|
116 |
+
if i in tf_idf_word_values:
|
117 |
+
tf_idf_word_values[i] = max(tf_idf_word_values[i],value)
|
118 |
+
else:
|
119 |
+
tf_idf_word_values[i] = value
|
120 |
+
count+=1
|
121 |
+
for i in tf_idf_word_values:
|
122 |
+
tf_idf_word_values[i] = round(tf_idf_word_values[i], 4)
|
123 |
+
|
124 |
+
tfidf_correct_ans = float('inf')
|
125 |
+
for i in correct_synonyms_words:
|
126 |
+
tfidf_correct_ans = min(total_tf_idf_value(tf_idf_word_values,i),tfidf_correct_ans)
|
127 |
+
|
128 |
+
return tf_idf_word_values,tfidf_correct_ans
|
129 |
+
|
130 |
+
|
131 |
+
def tfidf_answer_score(answer,tf_idf_word_values,max_tfidf,marks=10):
|
132 |
+
answer = remove_stopwords(answer)
|
133 |
+
answer_synonyms_words = process_sentence(answer)
|
134 |
+
value = total_tf_idf_value(tf_idf_word_values,answer_synonyms_words)
|
135 |
+
# print("tfidf value of answer: ",value, " , " "minimum tfidf value of correct answer answer: " ,max_tfidf)
|
136 |
+
score = (value/max_tfidf)*marks
|
137 |
+
# print(score)
|
138 |
+
if score>10:
|
139 |
+
return 10
|
140 |
+
else:
|
141 |
+
return score
|
142 |
+
|
templates/index.html
ADDED
@@ -0,0 +1,367 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
+
<head>
|
4 |
+
<meta charset="UTF-8">
|
5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
6 |
+
<title>Answer Generation</title>
|
7 |
+
<!-- Add Google Fonts -->
|
8 |
+
<link href="https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;500;600&display=swap" rel="stylesheet">
|
9 |
+
<style>
|
10 |
+
:root {
|
11 |
+
--primary-color: #4361ee;
|
12 |
+
--secondary-color: #3f37c9;
|
13 |
+
--accent-color: #4895ef;
|
14 |
+
--background-color: #f8f9fa;
|
15 |
+
--text-color: #2b2d42;
|
16 |
+
--border-radius: 8px;
|
17 |
+
--box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
|
18 |
+
}
|
19 |
+
|
20 |
+
body {
|
21 |
+
font-family: 'Poppins', sans-serif;
|
22 |
+
margin: 0;
|
23 |
+
padding: 2rem;
|
24 |
+
background-color: var(--background-color);
|
25 |
+
color: var(--text-color);
|
26 |
+
line-height: 1.6;
|
27 |
+
}
|
28 |
+
|
29 |
+
.container {
|
30 |
+
max-width: 1200px;
|
31 |
+
margin: 0 auto;
|
32 |
+
padding: 2rem;
|
33 |
+
background: white;
|
34 |
+
border-radius: var(--border-radius);
|
35 |
+
box-shadow: var(--box-shadow);
|
36 |
+
}
|
37 |
+
|
38 |
+
h2 {
|
39 |
+
color: var(--primary-color);
|
40 |
+
margin-bottom: 1.5rem;
|
41 |
+
font-weight: 600;
|
42 |
+
position: relative;
|
43 |
+
padding-bottom: 0.5rem;
|
44 |
+
}
|
45 |
+
|
46 |
+
h2::after {
|
47 |
+
content: '';
|
48 |
+
position: absolute;
|
49 |
+
bottom: 0;
|
50 |
+
left: 0;
|
51 |
+
width: 50px;
|
52 |
+
height: 3px;
|
53 |
+
background-color: var(--accent-color);
|
54 |
+
border-radius: 2px;
|
55 |
+
}
|
56 |
+
|
57 |
+
.section {
|
58 |
+
background: white;
|
59 |
+
padding: 1.5rem;
|
60 |
+
border-radius: var(--border-radius);
|
61 |
+
margin-bottom: 2rem;
|
62 |
+
box-shadow: var(--box-shadow);
|
63 |
+
}
|
64 |
+
|
65 |
+
.upload-container {
|
66 |
+
margin-bottom: 1.5rem;
|
67 |
+
}
|
68 |
+
|
69 |
+
label {
|
70 |
+
display: block;
|
71 |
+
margin-bottom: 0.5rem;
|
72 |
+
font-weight: 500;
|
73 |
+
color: var(--text-color);
|
74 |
+
}
|
75 |
+
|
76 |
+
input[type="file"] {
|
77 |
+
width: 100%;
|
78 |
+
padding: 0.5rem;
|
79 |
+
margin-bottom: 1rem;
|
80 |
+
border: 2px dashed var(--accent-color);
|
81 |
+
border-radius: var(--border-radius);
|
82 |
+
background: #f8f9fa;
|
83 |
+
cursor: pointer;
|
84 |
+
}
|
85 |
+
|
86 |
+
input[type="file"]:hover {
|
87 |
+
border-color: var(--primary-color);
|
88 |
+
}
|
89 |
+
|
90 |
+
select {
|
91 |
+
width: 100%;
|
92 |
+
padding: 0.8rem;
|
93 |
+
border: 1px solid #ddd;
|
94 |
+
border-radius: var(--border-radius);
|
95 |
+
margin-bottom: 1rem;
|
96 |
+
font-family: 'Poppins', sans-serif;
|
97 |
+
appearance: none;
|
98 |
+
background: white url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='16' height='16' fill='%23444' viewBox='0 0 16 16'%3E%3Cpath d='M8 12L2 6h12z'/%3E%3C/svg%3E") no-repeat right 0.8rem center;
|
99 |
+
}
|
100 |
+
|
101 |
+
button {
|
102 |
+
background-color: var(--primary-color);
|
103 |
+
color: white;
|
104 |
+
border: none;
|
105 |
+
padding: 0.8rem 1.5rem;
|
106 |
+
border-radius: var(--border-radius);
|
107 |
+
cursor: pointer;
|
108 |
+
font-weight: 500;
|
109 |
+
transition: all 0.3s ease;
|
110 |
+
font-family: 'Poppins', sans-serif;
|
111 |
+
width: 100%;
|
112 |
+
margin-bottom: 1rem;
|
113 |
+
}
|
114 |
+
|
115 |
+
button:hover {
|
116 |
+
background-color: var(--secondary-color);
|
117 |
+
transform: translateY(-2px);
|
118 |
+
box-shadow: 0 4px 12px rgba(67, 97, 238, 0.3);
|
119 |
+
}
|
120 |
+
|
121 |
+
.answer-box {
|
122 |
+
width: 100%;
|
123 |
+
min-height: 100px;
|
124 |
+
padding: 1rem;
|
125 |
+
margin-bottom: 1rem;
|
126 |
+
border: 1px solid #ddd;
|
127 |
+
border-radius: var(--border-radius);
|
128 |
+
font-family: 'Poppins', sans-serif;
|
129 |
+
resize: vertical;
|
130 |
+
transition: border-color 0.3s ease;
|
131 |
+
}
|
132 |
+
|
133 |
+
.answer-box:focus {
|
134 |
+
outline: none;
|
135 |
+
border-color: var(--accent-color);
|
136 |
+
box-shadow: 0 0 0 3px rgba(72, 149, 239, 0.2);
|
137 |
+
}
|
138 |
+
|
139 |
+
table {
|
140 |
+
width: 100%;
|
141 |
+
border-collapse: separate;
|
142 |
+
border-spacing: 0;
|
143 |
+
margin-top: 1.5rem;
|
144 |
+
background: white;
|
145 |
+
border-radius: var(--border-radius);
|
146 |
+
overflow: hidden;
|
147 |
+
box-shadow: var(--box-shadow);
|
148 |
+
}
|
149 |
+
|
150 |
+
th, td {
|
151 |
+
padding: 1rem;
|
152 |
+
text-align: left;
|
153 |
+
border-bottom: 1px solid #eee;
|
154 |
+
}
|
155 |
+
|
156 |
+
th {
|
157 |
+
background-color: var(--primary-color);
|
158 |
+
color: white;
|
159 |
+
font-weight: 500;
|
160 |
+
}
|
161 |
+
|
162 |
+
tr:hover {
|
163 |
+
background-color: #f8f9fa;
|
164 |
+
}
|
165 |
+
|
166 |
+
.hidden {
|
167 |
+
display: none;
|
168 |
+
}
|
169 |
+
|
170 |
+
/* Responsive Design */
|
171 |
+
@media (max-width: 768px) {
|
172 |
+
body {
|
173 |
+
padding: 1rem;
|
174 |
+
}
|
175 |
+
|
176 |
+
.container {
|
177 |
+
padding: 1rem;
|
178 |
+
}
|
179 |
+
|
180 |
+
button {
|
181 |
+
padding: 0.7rem 1rem;
|
182 |
+
}
|
183 |
+
}
|
184 |
+
|
185 |
+
/* Animation */
|
186 |
+
@keyframes fadeIn {
|
187 |
+
from { opacity: 0; transform: translateY(10px); }
|
188 |
+
to { opacity: 1; transform: translateY(0); }
|
189 |
+
}
|
190 |
+
|
191 |
+
.section {
|
192 |
+
animation: fadeIn 0.5s ease-out;
|
193 |
+
}
|
194 |
+
</style>
|
195 |
+
</head>
|
196 |
+
<body>
|
197 |
+
<div class="container">
|
198 |
+
<div class="section">
|
199 |
+
<h2>Upload Query CSV File</h2>
|
200 |
+
<div id="query-upload">
|
201 |
+
<label for="query-file">Query File:</label>
|
202 |
+
<div class="upload-container">
|
203 |
+
<input type="file" id="query-file" accept=".csv">
|
204 |
+
</div>
|
205 |
+
</div>
|
206 |
+
</div>
|
207 |
+
|
208 |
+
<div class="section">
|
209 |
+
<h2>Answer Generation</h2>
|
210 |
+
<label for="file-type">Select File Type:</label>
|
211 |
+
<select id="file-type" onchange="handleFileTypeChange()">
|
212 |
+
<option value="pdf">PDF</option>
|
213 |
+
<option value="csv">CSV</option>
|
214 |
+
</select>
|
215 |
+
|
216 |
+
<div id="csv-upload" class="hidden">
|
217 |
+
<label for="csv-file">Upload Answer CSV File:</label>
|
218 |
+
<div class="upload-container">
|
219 |
+
<input type="file" id="csv-file" accept=".csv">
|
220 |
+
</div>
|
221 |
+
</div>
|
222 |
+
|
223 |
+
<button id="compute-btn" onclick="computeAnswers()">Compute Answers</button>
|
224 |
+
</div>
|
225 |
+
|
226 |
+
<div class="section">
|
227 |
+
<h2>Student Answers Upload</h2>
|
228 |
+
<label for="folder-upload">Upload Student Answers Folder:</label>
|
229 |
+
<div class="upload-container">
|
230 |
+
<input type="file" id="folder-upload" webkitdirectory directory multiple>
|
231 |
+
<small class="help-text">Select the folder containing student answer images</small>
|
232 |
+
</div>
|
233 |
+
</div>
|
234 |
+
|
235 |
+
<div class="section">
|
236 |
+
<div id="answers-container"></div>
|
237 |
+
<button id="compute-marks-btn" onclick="computeMarks()">Compute Marks</button>
|
238 |
+
<div id="marks-table-container"></div>
|
239 |
+
</div>
|
240 |
+
</div>
|
241 |
+
|
242 |
+
<script>
|
243 |
+
function handleFileTypeChange() {
|
244 |
+
const fileType = document.getElementById('file-type').value;
|
245 |
+
const csvUpload = document.getElementById('csv-upload');
|
246 |
+
if (fileType === 'csv') {
|
247 |
+
csvUpload.classList.remove('hidden'); // Show the CSV upload section
|
248 |
+
} else {
|
249 |
+
csvUpload.classList.add('hidden'); // Hide the CSV upload section
|
250 |
+
}
|
251 |
+
}
|
252 |
+
|
253 |
+
async function computeAnswers() {
|
254 |
+
try {
|
255 |
+
const fileType = document.getElementById('file-type').value;
|
256 |
+
const queryfile = document.getElementById('query-file').files[0];
|
257 |
+
const anscsvFile = document.getElementById('csv-file').files[0];
|
258 |
+
|
259 |
+
const formData = new FormData();
|
260 |
+
formData.append('file_type', fileType);
|
261 |
+
formData.append('query_file', queryfile);
|
262 |
+
if (anscsvFile) {
|
263 |
+
formData.append('ans_csv_file', anscsvFile);
|
264 |
+
}
|
265 |
+
|
266 |
+
const response = await fetch('/compute_answers', { method: 'POST', body: formData });
|
267 |
+
const result = await response.json();
|
268 |
+
if (result.answers) {
|
269 |
+
displayAnswers(result.answers);
|
270 |
+
} else {
|
271 |
+
console.error('No answers received:', result);
|
272 |
+
}
|
273 |
+
} catch (error) {
|
274 |
+
console.error('Error:', error);
|
275 |
+
}
|
276 |
+
}
|
277 |
+
|
278 |
+
|
279 |
+
function displayAnswers(answers) {
|
280 |
+
const container = document.getElementById('answers-container');
|
281 |
+
container.innerHTML = ''; // Clear previous answers
|
282 |
+
|
283 |
+
answers.forEach(answer => {
|
284 |
+
const textBox = document.createElement('textarea');
|
285 |
+
textBox.className = 'answer-box';
|
286 |
+
textBox.value = answer.join('\n\n'); // Set the answer as the value of the text box
|
287 |
+
container.appendChild(textBox);
|
288 |
+
});
|
289 |
+
}
|
290 |
+
|
291 |
+
async function computeMarks() {
|
292 |
+
try {
|
293 |
+
const answerBoxes = document.querySelectorAll('.answer-box');
|
294 |
+
const answers = answerBoxes.length === 1 ? [answerBoxes[0].value.trim()] : Array.from(answerBoxes).map(box => box.value.trim());
|
295 |
+
|
296 |
+
// Create FormData and append answers
|
297 |
+
const formData = new FormData();
|
298 |
+
formData.append('answers', JSON.stringify(answers));
|
299 |
+
|
300 |
+
// Handle folder upload
|
301 |
+
const folderInput = document.getElementById('folder-upload');
|
302 |
+
const files = folderInput.files;
|
303 |
+
|
304 |
+
// Append each file with its relative path
|
305 |
+
for (let i = 0; i < files.length; i++) {
|
306 |
+
const file = files[i];
|
307 |
+
const relativePath = file.webkitRelativePath;
|
308 |
+
formData.append('files[]', file, relativePath);
|
309 |
+
}
|
310 |
+
|
311 |
+
const response = await fetch('/compute_marks', {
|
312 |
+
method: 'POST',
|
313 |
+
body: formData
|
314 |
+
});
|
315 |
+
const result = await response.json();
|
316 |
+
|
317 |
+
if (result) {
|
318 |
+
displayMarksTable(result);
|
319 |
+
} else {
|
320 |
+
console.error('No marks data received:', result);
|
321 |
+
}
|
322 |
+
} catch (error) {
|
323 |
+
console.error('Error:', error);
|
324 |
+
}
|
325 |
+
}
|
326 |
+
|
327 |
+
function displayMarksTable(data) {
|
328 |
+
// Access the 'message' property which contains the actual marks data
|
329 |
+
const marksData = data.message;
|
330 |
+
|
331 |
+
console.log('Marks Data Received:', marksData);
|
332 |
+
|
333 |
+
const container = document.getElementById('marks-table-container');
|
334 |
+
container.innerHTML = ''; // Clear previous table
|
335 |
+
|
336 |
+
// Create table
|
337 |
+
const table = document.createElement('table');
|
338 |
+
table.innerHTML = `
|
339 |
+
<tr>
|
340 |
+
<th>Name</th>
|
341 |
+
<th>Question Number</th>
|
342 |
+
<th>Marks</th>
|
343 |
+
</tr>
|
344 |
+
`;
|
345 |
+
|
346 |
+
for (const [name, marks] of Object.entries(marksData)) {
|
347 |
+
if (!Array.isArray(marks)) {
|
348 |
+
console.error('Invalid marks for ${name}:', marks);
|
349 |
+
continue; // Skip invalid entries
|
350 |
+
}
|
351 |
+
|
352 |
+
marks.forEach((mark, index) => {
|
353 |
+
const row = document.createElement('tr');
|
354 |
+
row.innerHTML = `
|
355 |
+
<td>${name}</td>
|
356 |
+
<td>${index + 1}</td>
|
357 |
+
<td>${mark.toFixed(2)}</td>
|
358 |
+
`;
|
359 |
+
table.appendChild(row);
|
360 |
+
});
|
361 |
+
}
|
362 |
+
|
363 |
+
container.appendChild(table);
|
364 |
+
}
|
365 |
+
</script>
|
366 |
+
</body>
|
367 |
+
</html>
|