Spaces:

yamanavijayavardhan
/

answer-grading-app

Sleeping

App Files Files Community

yamanavijayavardhan commited on Apr 2

Commit

26f855a

1 Parent(s): 13cd7b4

update_new_new

Browse files

Files changed (10) hide show

HTR/app.py +22 -12
HTR/hcr.py +43 -17
HTR/spell_and_gramer_check.py +39 -18
HTR/strike.py +59 -27
HTR/word.py +217 -199
all_models.py +77 -12
main.py +18 -5
similarity_check/llm_based_scoring/llm.py +75 -59
similarity_check/semantic_meaning_check/semantic.py +69 -37
similarity_check/tf_idf/tf_idf_score.py +128 -105

HTR/app.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import cv2
 from HTR.word import convert_image
 from HTR.strike import struck_images
@@ -7,17 +9,25 @@ from HTR.spell_and_gramer_check import spell_grammer
 # Define a function to extract text from an image
 def extract_text_from_image(img_path):
-    img = cv2.imread(img_path)
-    # print(img)
-    imgs = convert_image(img)
-    images_path = struck_images(imgs)
-    t = text(images_path)
-    # print("\n\n\n\n\n\n\n")
-    # print(t)
-    t = spell_grammer(t)
-    # t = text
-    # print("\n\n\n\n\n\n\n")
-    # print(t)
-    return t
 # extract_text_from_image("ans_image/1.jpg")

 import cv2
+import os
+import tempfile
 from HTR.word import convert_image
 from HTR.strike import struck_images
 # Define a function to extract text from an image
 def extract_text_from_image(img_path):
+    try:
+        # Ensure the image exists
+        if not os.path.exists(img_path):
+            raise FileNotFoundError(f"Image file not found: {img_path}")
+        # Read the image
+        img = cv2.imread(img_path)
+        if img is None:
+            raise ValueError(f"Failed to read image: {img_path}")
+        # Process the image
+        imgs = convert_image(img)
+        images_path = struck_images(imgs)
+        t = text(images_path)
+        t = spell_grammer(t)
+        return t
+    except Exception as e:
+        print(f"Error in extract_text_from_image: {str(e)}")
+        return ""
 # extract_text_from_image("ans_image/1.jpg")

HTR/hcr.py CHANGED Viewed

@@ -1,27 +1,53 @@
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 from PIL import Image
 import cv2
 MODEL_NAME = "microsoft/trocr-large-handwritten"
-processor = TrOCRProcessor.from_pretrained(MODEL_NAME)
-model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME)
 def text(image_cv):
-    t = ""
-    for i in image_cv:
-        img_rgb = cv2.cvtColor(i, cv2.COLOR_BGR2RGB)
-        image = Image.fromarray(img_rgb)
-        # image = Image.open(i).convert("RGB")
-        pixel_values = processor(image, return_tensors="pt").pixel_values
-        generated_ids = model.generate(pixel_values)
-        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        t = t+generated_text.replace(" ", "")+ " "
-    # print(t)
-    return t

 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 from PIL import Image
 import cv2
+import os
+import torch
+# Initialize model and processor globally
 MODEL_NAME = "microsoft/trocr-large-handwritten"
+processor = None
+model = None
+def initialize_model():
+    global processor, model
+    if processor is None or model is None:
+        try:
+            processor = TrOCRProcessor.from_pretrained(MODEL_NAME)
+            model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME)
+            if torch.cuda.is_available():
+                model = model.to('cuda')
+        except Exception as e:
+            print(f"Error initializing model: {str(e)}")
+            raise
 def text(image_cv):
+    try:
+        # Initialize model if not already done
+        initialize_model()
+        t = ""
+        for i in image_cv:
+            try:
+                # Convert BGR to RGB
+                img_rgb = cv2.cvtColor(i, cv2.COLOR_BGR2RGB)
+                image = Image.fromarray(img_rgb)
+                # Process image
+                pixel_values = processor(image, return_tensors="pt").pixel_values
+                if torch.cuda.is_available():
+                    pixel_values = pixel_values.to('cuda')
+                generated_ids = model.generate(pixel_values)
+                generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+                t = t + generated_text.replace(" ", "") + " "
+            except Exception as e:
+                print(f"Error processing image: {str(e)}")
+                continue
+        return t.strip()
+    except Exception as e:
+        print(f"Error in text function: {str(e)}")
+        return ""

HTR/spell_and_gramer_check.py CHANGED Viewed

@@ -1,22 +1,43 @@
 from spellchecker import SpellChecker
-spell = SpellChecker()
 def spell_grammer(text):
-    # Split text into words
-    words = text.split()
-    # Find misspelled words
-    misspelled = spell.unknown(words)
-    # Correct misspelled words
-    corrected_words = []
-    for word in words:
-        if word in misspelled:
-            corrected_words.append(spell.correction(word))
-        else:
-            corrected_words.append(word)
-    # Join words back into text
-    corrected_text = ' '.join(corrected_words)
-    return corrected_text

 from spellchecker import SpellChecker
+import os
+# Initialize spell checker globally
+spell = None
+def initialize_spell_checker():
+    global spell
+    if spell is None:
+        try:
+            spell = SpellChecker()
+        except Exception as e:
+            print(f"Error initializing spell checker: {str(e)}")
+            raise
 def spell_grammer(text):
+    try:
+        # Initialize spell checker if not already done
+        initialize_spell_checker()
+        if not text or not isinstance(text, str):
+            return ""
+        # Split text into words
+        words = text.split()
+        # Find misspelled words
+        misspelled = spell.unknown(words)
+        # Correct misspelled words
+        corrected_words = []
+        for word in words:
+            if word in misspelled:
+                corrected_words.append(spell.correction(word))
+            else:
+                corrected_words.append(word)
+        # Join words back into text
+        corrected_text = ' '.join(corrected_words)
+        return corrected_text.strip()
+    except Exception as e:
+        print(f"Error in spell_grammer: {str(e)}")
+        return text

HTR/strike.py CHANGED Viewed

@@ -5,40 +5,72 @@ import os
 import cv2
 from transformers import AutoModelForImageClassification
-def image_preprocessing(image):
-    images=[]
-    for i in image:
-        binary_image = i
-        binary_image = cv2.resize(binary_image, (224, 224))
-        binary_image = cv2.merge([binary_image, binary_image, binary_image])
-        binary_image = binary_image/255
-        binary_image = torch.from_numpy(binary_image)
-        images.append(binary_image)
-    return images
-def predict_image(image_path, model):
-    preprocessed_img = image_preprocessing(image_path)
-    images = torch.stack(preprocessed_img)
-    images = images.permute(0, 3, 1, 2)
-    predictions = model(images).logits.detach().numpy()
-    return predictions
-model = AutoModelForImageClassification.from_pretrained("models/vit-base-beans")
 def struck_images(word__image):
-    predictions = predict_image(word__image, model)
-    not_struck =[]
-    for i in range(len(predictions)):
-        if predictions[i].argmax().item() == 0:
-            not_struck.append(word__image[i])
-    # print(not_struck)
-    return not_struck
 # struck_images()

 import cv2
 from transformers import AutoModelForImageClassification
+# Initialize model globally
+model = None
+def initialize_model():
+    global model
+    if model is None:
+        try:
+            model = AutoModelForImageClassification.from_pretrained("models/vit-base-beans")
+            if torch.cuda.is_available():
+                model = model.to('cuda')
+        except Exception as e:
+            print(f"Error initializing model: {str(e)}")
+            raise
+def image_preprocessing(image):
+    try:
+        images = []
+        for i in image:
+            binary_image = i
+            binary_image = cv2.resize(binary_image, (224, 224))
+            binary_image = cv2.merge([binary_image, binary_image, binary_image])
+            binary_image = binary_image/255
+            binary_image = torch.from_numpy(binary_image)
+            images.append(binary_image)
+        return images
+    except Exception as e:
+        print(f"Error in image_preprocessing: {str(e)}")
+        return []
+def predict_image(image_path, model):
+    try:
+        preprocessed_img = image_preprocessing(image_path)
+        if not preprocessed_img:
+            return None
+        images = torch.stack(preprocessed_img)
+        images = images.permute(0, 3, 1, 2)
+        if torch.cuda.is_available():
+            images = images.to('cuda')
+        with torch.no_grad():
+            predictions = model(images).logits.detach().cpu().numpy()
+        return predictions
+    except Exception as e:
+        print(f"Error in predict_image: {str(e)}")
+        return None
 def struck_images(word__image):
+    try:
+        # Initialize model if not already done
+        initialize_model()
+        predictions = predict_image(word__image, model)
+        if predictions is None:
+            return []
+        not_struck = []
+        for i in range(len(predictions)):
+            if predictions[i].argmax().item() == 0:
+                not_struck.append(word__image[i])
+        return not_struck
+    except Exception as e:
+        print(f"Error in struck_images: {str(e)}")
+        return word__image  # Return original images if error occurs
 # struck_images()

HTR/word.py CHANGED Viewed

@@ -3,6 +3,7 @@ import cv2
 # import matplotlib.pyplot as plt
 import sys
 import os
 cordinates =[]
@@ -10,95 +11,107 @@ cordinates =[]
 def four_point_transform(image, pts):
-    rect = pts
-    (tl, tr, br, bl) = rect
-    # Compute the width of the new image
-    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
-    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
-    maxWidth = max(int(widthA), int(widthB))
-    # Compute the height of the new image
-    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
-    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
-    maxHeight = max(int(heightA), int(heightB))
-    dst = np.array([
-        [0, 0],
-        [maxWidth - 1, 0],
-        [maxWidth - 1, maxHeight - 1],
-        [0, maxHeight - 1]], dtype="float32")
-    rect = np.array(rect, dtype="float32")
-    M = cv2.getPerspectiveTransform(rect, dst)
-    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
-    return warped
 def remove_shadow(image):
-    rgb_planes = cv2.split(image)
-    result_planes = []
-    result_norm_planes = []
-    for plane in rgb_planes:
-        dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
-        bg_img = cv2.medianBlur(dilated_img, 21)
-        diff_img = 255 - cv2.absdiff(plane, bg_img)
-        norm_img = cv2.normalize(diff_img,None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
-        result_planes.append(diff_img)
-        result_norm_planes.append(norm_img)
-    result = cv2.merge(result_planes)
-    result_norm = cv2.merge(result_norm_planes)
-    return result,result_norm
 def analise(image):
     global line, binary_image1, x_scaling , y_scaling
-    kernel = np.ones((1,250),np.uint8)
-    dilation = cv2.dilate(image, kernel, iterations = 2)
-    # cv2.namedWindow("Image", cv2.WINDOW_NORMAL)
-    # cv2.imshow('Image',dilation)
-    # cv2.waitKey(0)
-    contours, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    for i in reversed(contours):
-            x, y, w, h = cv2.boundingRect(i)
-            if cv2.contourArea(i)<20 :
-                continue
-            elif h < 8:
-                continue
-            else:
-                scaling_factor_in_y = 0.5
-                scaling_factor_in_x = 0
-                resized_contour = i.copy()
-                resized_contour = i * [x_scaling, y_scaling]
-                resized_contour = resized_contour.astype(int)
-                final_image__ = np.zeros_like(binary_image1)
-                cv2.drawContours(final_image__, [resized_contour], 0, (255), -1)
-                kernel_dil = np.ones((3,3),np.uint8)
-                final_image__ = cv2.dilate(final_image__,kernel_dil,iterations = 3)
-                line_image_final = cv2.bitwise_and(final_image__, binary_image1)
-                line.append(line_image_final)
-                # cv2.namedWindow("Line image", cv2.WINDOW_NORMAL)
-                # cv2.imshow('Line image',line_image_final)
-                # cv2.waitKey(0)
 def image_resize_and_errosion(image):
     height, width = image.shape[:2]
@@ -122,163 +135,168 @@ line_length = 0
 count = 0
 def convert_image(img):
-    folder_path = 'images'
-    for filename in os.listdir(folder_path):
-        file_path = os.path.join(folder_path, filename)
-        try:
-            if os.path.isfile(file_path):
-                os.remove(file_path)
-        except Exception as e:
-            print(f"Error deleting file {file_path}: {e}")
-    global x_scaling,y_scaling,binary_image1,line,line_lenght,count
-    # img = cv2.imread(image_file)
-    img_copy = np.copy(img)
-    line_lenght = 250
-    rect_image = img
-    # removing the shadow in the image
-    image1, image2_ = remove_shadow(rect_image)
-    # converting into grayscale
-    gray_ = cv2.cvtColor(image2_,cv2.COLOR_BGR2GRAY)
-    # cv2.namedWindow("grayscale image", cv2.WINDOW_NORMAL)
-    # cv2.imshow('grayscale image',gray_)
-    # cv2.waitKey(0)
-    # convrting into binaryimage
-    _, binary_image_ = cv2.threshold(gray_, 200, 255, cv2.THRESH_BINARY)
-    # cv2.namedWindow("binary image", cv2.WINDOW_NORMAL)
-    # cv2.imshow('binary image',binary_image_)
-    # cv2.waitKey(0)
-    inverted_binary_image_ = 255 - binary_image_
-    binary_image1 = np.copy(inverted_binary_image_)
-    y_height ,x_width= rect_image.shape[:2]
-    # print("image width, height =", x_width, y_height)
-    # resizing the image
-    new_width = 500*5
-    new_height = 705*5
-    x_scaling = x_width/new_width
-    y_scaling = y_height/new_height
-    # print("After resizing width, height", new_width , new_height)
-    rect_image = cv2.resize(rect_image, (new_width, new_height), interpolation=cv2.INTER_NEAREST)
-    # cv2.namedWindow("resized image", cv2.WINDOW_NORMAL)
-    # cv2.imshow('resized image',rect_image)
-    # cv2.waitKey(0)
-    # removing the shadow in the image
-    image1, image2 = remove_shadow(rect_image)
-    # converting into grayscale
-    gray = cv2.cvtColor(image2,cv2.COLOR_BGR2GRAY)
-    # cv2.namedWindow("grayscale image", cv2.WINDOW_NORMAL)
-    # cv2.imshow('grayscale image',gray)
-    # cv2.waitKey(0)
-    # convrting into binaryimage
-    _, binary_image = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
-    _, binary_image = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
-    # cv2.namedWindow("binary image", cv2.WINDOW_NORMAL)
-    # cv2.imshow('binary image',gray)
-    # cv2.waitKey(0)
-    # inverting the pixel
-    inverted_binary_image = 255 - binary_image
-    kernel = np.ones((2,2),np.uint8)
-    # performing  erosion to remove noise
-    erosion = cv2.erode(inverted_binary_image,kernel,iterations = 1)
-    # cv2.namedWindow("erosion", cv2.WINDOW_NORMAL)
-    # cv2.imshow('erosion',erosion)
-    # cv2.waitKey(0)
-    # performing Dilution operatiom
-    dilation = cv2.dilate(erosion,kernel,iterations = 1)
-    # cv2.namedWindow("dilation", cv2.WINDOW_NORMAL)
-    # cv2.imshow('dilation',erosion)
-    # cv2.waitKey(0)
-    new_image = np.copy(dilation)
-    new_image = 255 - new_image
-    # defining kernal size
-    kernel = np.ones((1,250),np.uint8)
-    # performing Dilution operatiom
-    dilation_1 = cv2.dilate(dilation,kernel,iterations = 2)
-    # cv2.namedWindow("dilation_1", cv2.WINDOW_NORMAL)
-    # cv2.imshow('dilation_1',dilation_1)
-    # cv2.waitKey(0)
-    contours, _ = cv2.findContours(dilation_1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    line = []
-    # line saparation
-    for i in reversed(contours):
-        x, y, w, h = cv2.boundingRect(i)
-        if cv2.contourArea(i)<20:
-            continue
-        elif h < 10:
-            continue
-        else:
-            cv2.drawContours(new_image, [i],-1,(0),2)
-            final_image_ = np.zeros_like(binary_image)
-            cv2.drawContours(final_image_, [i], 0, (255), -1)
-            # cv2.namedWindow("final_image_", cv2.WINDOW_NORMAL)
-            # cv2.imshow('final_image_',final_image_)
-            # cv2.waitKey(0)
-            line_image = cv2.bitwise_and(final_image_, dilation)
-            # cv2.namedWindow("line_image", cv2.WINDOW_NORMAL)
-            # cv2.imshow('line_image',line_image)
-            # cv2.waitKey(0)
-            analise(line_image)
-    count = 0
-    kernel1 = np.ones((8,8),np.uint8)
-    word__image = [] # newly added
-    for line_image in line:
-        dilation_2 = cv2.dilate(line_image,kernel1,iterations = 2)
-        contours1, _ = cv2.findContours(dilation_2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-        sorted_contours = sorted(contours1, key=lambda c: cv2.boundingRect(c)[0])
-        for j in sorted_contours:
-            x1,y1,w1,h1 = cv2.boundingRect(j)
-            final_image = line_image[y1:y1+h1,x1:x1+w1]
-            image_name ="images/"+str(count)+".png"
-            final_image = 255 - final_image
-            word__image.append(final_image)# newly added
-            # cv2.imwrite(image_name, final_image)
-            count=count+1
-    # cv2.waitKey(0)
-#    cv2.destroyAllWindows()
-    return word__image

 # import matplotlib.pyplot as plt
 import sys
 import os
+import tempfile
 cordinates =[]
 def four_point_transform(image, pts):
+    try:
+        rect = pts
+        (tl, tr, br, bl) = rect
+        # Compute the width of the new image
+        widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
+        widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
+        maxWidth = max(int(widthA), int(widthB))
+        # Compute the height of the new image
+        heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
+        heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
+        maxHeight = max(int(heightA), int(heightB))
+        dst = np.array([
+            [0, 0],
+            [maxWidth - 1, 0],
+            [maxWidth - 1, maxHeight - 1],
+            [0, maxHeight - 1]], dtype="float32")
+        rect = np.array(rect, dtype="float32")
+        M = cv2.getPerspectiveTransform(rect, dst)
+        warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
+        return warped
+    except Exception as e:
+        print(f"Error in four_point_transform: {str(e)}")
+        return image
 def remove_shadow(image):
+    try:
+        rgb_planes = cv2.split(image)
+        result_planes = []
+        result_norm_planes = []
+        for plane in rgb_planes:
+            dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
+            bg_img = cv2.medianBlur(dilated_img, 21)
+            diff_img = 255 - cv2.absdiff(plane, bg_img)
+            norm_img = cv2.normalize(diff_img,None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
+            result_planes.append(diff_img)
+            result_norm_planes.append(norm_img)
+        result = cv2.merge(result_planes)
+        result_norm = cv2.merge(result_norm_planes)
+        return result, result_norm
+    except Exception as e:
+        print(f"Error in remove_shadow: {str(e)}")
+        return image, image
 def analise(image):
     global line, binary_image1, x_scaling , y_scaling
+    try:
+        kernel = np.ones((1,250),np.uint8)
+        dilation = cv2.dilate(image, kernel, iterations = 2)
+        # cv2.namedWindow("Image", cv2.WINDOW_NORMAL)
+        # cv2.imshow('Image',dilation)
+        # cv2.waitKey(0)
+        contours, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        for i in reversed(contours):
+                x, y, w, h = cv2.boundingRect(i)
+                if cv2.contourArea(i)<20 :
+                    continue
+                elif h < 8:
+                    continue
+                else:
+                    scaling_factor_in_y = 0.5
+                    scaling_factor_in_x = 0
+                    resized_contour = i.copy()
+                    resized_contour = i * [x_scaling, y_scaling]
+                    resized_contour = resized_contour.astype(int)
+                    final_image__ = np.zeros_like(binary_image1)
+                    cv2.drawContours(final_image__, [resized_contour], 0, (255), -1)
+                    kernel_dil = np.ones((3,3),np.uint8)
+                    final_image__ = cv2.dilate(final_image__,kernel_dil,iterations = 3)
+                    line_image_final = cv2.bitwise_and(final_image__, binary_image1)
+                    line.append(line_image_final)
+                    # cv2.namedWindow("Line image", cv2.WINDOW_NORMAL)
+                    # cv2.imshow('Line image',line_image_final)
+                    # cv2.waitKey(0)
+    except Exception as e:
+        print(f"Error in analise: {str(e)}")
 def image_resize_and_errosion(image):
     height, width = image.shape[:2]
 count = 0
 def convert_image(img):
+    try:
+        folder_path = 'images'
+        for filename in os.listdir(folder_path):
+            file_path = os.path.join(folder_path, filename)
+            try:
+                if os.path.isfile(file_path):
+                    os.remove(file_path)
+            except Exception as e:
+                print(f"Error deleting file {file_path}: {e}")
+        global x_scaling,y_scaling,binary_image1,line,line_lenght,count
+        # img = cv2.imread(image_file)
+        img_copy = np.copy(img)
+        line_lenght = 250
+        rect_image = img
+        # removing the shadow in the image
+        image1, image2_ = remove_shadow(rect_image)
+        # converting into grayscale
+        gray_ = cv2.cvtColor(image2_,cv2.COLOR_BGR2GRAY)
+        # cv2.namedWindow("grayscale image", cv2.WINDOW_NORMAL)
+        # cv2.imshow('grayscale image',gray_)
+        # cv2.waitKey(0)
+        # convrting into binaryimage
+        _, binary_image_ = cv2.threshold(gray_, 200, 255, cv2.THRESH_BINARY)
+        # cv2.namedWindow("binary image", cv2.WINDOW_NORMAL)
+        # cv2.imshow('binary image',binary_image_)
+        # cv2.waitKey(0)
+        inverted_binary_image_ = 255 - binary_image_
+        binary_image1 = np.copy(inverted_binary_image_)
+        y_height ,x_width= rect_image.shape[:2]
+        # print("image width, height =", x_width, y_height)
+        # resizing the image
+        new_width = 500*5
+        new_height = 705*5
+        x_scaling = x_width/new_width
+        y_scaling = y_height/new_height
+        # print("After resizing width, height", new_width , new_height)
+        rect_image = cv2.resize(rect_image, (new_width, new_height), interpolation=cv2.INTER_NEAREST)
+        # cv2.namedWindow("resized image", cv2.WINDOW_NORMAL)
+        # cv2.imshow('resized image',rect_image)
+        # cv2.waitKey(0)
+        # removing the shadow in the image
+        image1, image2 = remove_shadow(rect_image)
+        # converting into grayscale
+        gray = cv2.cvtColor(image2,cv2.COLOR_BGR2GRAY)
+        # cv2.namedWindow("grayscale image", cv2.WINDOW_NORMAL)
+        # cv2.imshow('grayscale image',gray)
+        # cv2.waitKey(0)
+        # convrting into binaryimage
+        _, binary_image = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+        _, binary_image = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+        # cv2.namedWindow("binary image", cv2.WINDOW_NORMAL)
+        # cv2.imshow('binary image',gray)
+        # cv2.waitKey(0)
+        # inverting the pixel
+        inverted_binary_image = 255 - binary_image
+        kernel = np.ones((2,2),np.uint8)
+        # performing  erosion to remove noise
+        erosion = cv2.erode(inverted_binary_image,kernel,iterations = 1)
+        # cv2.namedWindow("erosion", cv2.WINDOW_NORMAL)
+        # cv2.imshow('erosion',erosion)
+        # cv2.waitKey(0)
+        # performing Dilution operatiom
+        dilation = cv2.dilate(erosion,kernel,iterations = 1)
+        # cv2.namedWindow("dilation", cv2.WINDOW_NORMAL)
+        # cv2.imshow('dilation',erosion)
+        # cv2.waitKey(0)
+        new_image = np.copy(dilation)
+        new_image = 255 - new_image
+        # defining kernal size
+        kernel = np.ones((1,250),np.uint8)
+        # performing Dilution operatiom
+        dilation_1 = cv2.dilate(dilation,kernel,iterations = 2)
+        # cv2.namedWindow("dilation_1", cv2.WINDOW_NORMAL)
+        # cv2.imshow('dilation_1',dilation_1)
+        # cv2.waitKey(0)
+        contours, _ = cv2.findContours(dilation_1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        line = []
+        # line saparation
+        for i in reversed(contours):
+            x, y, w, h = cv2.boundingRect(i)
+            if cv2.contourArea(i)<20:
+                continue
+            elif h < 10:
+                continue
+            else:
+                cv2.drawContours(new_image, [i],-1,(0),2)
+                final_image_ = np.zeros_like(binary_image)
+                cv2.drawContours(final_image_, [i], 0, (255), -1)
+                # cv2.namedWindow("final_image_", cv2.WINDOW_NORMAL)
+                # cv2.imshow('final_image_',final_image_)
+                # cv2.waitKey(0)
+                line_image = cv2.bitwise_and(final_image_, dilation)
+                # cv2.namedWindow("line_image", cv2.WINDOW_NORMAL)
+                # cv2.imshow('line_image',line_image)
+                # cv2.waitKey(0)
+                analise(line_image)
+        count = 0
+        kernel1 = np.ones((8,8),np.uint8)
+        word__image = [] # newly added
+        for line_image in line:
+            dilation_2 = cv2.dilate(line_image,kernel1,iterations = 2)
+            contours1, _ = cv2.findContours(dilation_2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+            sorted_contours = sorted(contours1, key=lambda c: cv2.boundingRect(c)[0])
+            for j in sorted_contours:
+                x1,y1,w1,h1 = cv2.boundingRect(j)
+                final_image = line_image[y1:y1+h1,x1:x1+w1]
+                image_name ="images/"+str(count)+".png"
+                final_image = 255 - final_image
+                word__image.append(final_image)# newly added
+                # cv2.imwrite(image_name, final_image)
+                count=count+1
+        # cv2.waitKey(0)
+    #    cv2.destroyAllWindows()
+        return word__image
+    except Exception as e:
+        print(f"Error in convert_image: {str(e)}")
+        return []

all_models.py CHANGED Viewed

@@ -1,5 +1,13 @@
 from sentence_transformers import SentenceTransformer
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 class ModelSingleton:
     _instance = None
@@ -12,17 +20,74 @@ class ModelSingleton:
     def __init__(self):
         if not self._initialized:
-            # Sentence transformer model
-            SENTENCE_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
-            self.similarity_tokenizer = AutoTokenizer.from_pretrained(SENTENCE_MODEL)
-            self.similarity_model = SentenceTransformer(SENTENCE_MODEL)
-            # Flan-T5-xl model only
-            FLAN_MODEL = "google/flan-t5-xl"
-            self.flan_tokenizer = AutoTokenizer.from_pretrained(FLAN_MODEL)
-            self.flan_model = AutoModelForSeq2SeqLM.from_pretrained(FLAN_MODEL)
-            self._initialized = True
 # Create a global instance
-models = ModelSingleton()

 from sentence_transformers import SentenceTransformer
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import torch
+import os
+import tempfile
+import logging
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
 class ModelSingleton:
     _instance = None
     def __init__(self):
         if not self._initialized:
+            try:
+                # Set cache directory to temporary directory
+                cache_dir = os.getenv('TRANSFORMERS_CACHE', tempfile.gettempdir())
+                os.environ['TRANSFORMERS_CACHE'] = cache_dir
+                # Get device
+                self.device = "cuda" if torch.cuda.is_available() else "cpu"
+                logger.info(f"Using device: {self.device}")
+                # Sentence transformer model
+                try:
+                    logger.info("Loading sentence transformer model...")
+                    SENTENCE_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
+                    self.similarity_tokenizer = AutoTokenizer.from_pretrained(
+                        SENTENCE_MODEL,
+                        cache_dir=cache_dir
+                    )
+                    self.similarity_model = SentenceTransformer(
+                        SENTENCE_MODEL,
+                        cache_folder=cache_dir
+                    )
+                    self.similarity_model.to(self.device)
+                    logger.info("Sentence transformer model loaded successfully")
+                except Exception as e:
+                    logger.error(f"Error loading sentence transformer model: {e}")
+                    raise
+                # Flan-T5-xl model
+                try:
+                    logger.info("Loading Flan-T5 model...")
+                    FLAN_MODEL = "google/flan-t5-xl"
+                    self.flan_tokenizer = AutoTokenizer.from_pretrained(
+                        FLAN_MODEL,
+                        cache_dir=cache_dir
+                    )
+                    self.flan_model = AutoModelForSeq2SeqLM.from_pretrained(
+                        FLAN_MODEL,
+                        cache_dir=cache_dir,
+                        torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
+                    )
+                    self.flan_model.to(self.device)
+                    logger.info("Flan-T5 model loaded successfully")
+                except Exception as e:
+                    logger.error(f"Error loading Flan-T5 model: {e}")
+                    raise
+                self._initialized = True
+                logger.info("All models initialized successfully")
+            except Exception as e:
+                logger.error(f"Error during model initialization: {e}")
+                raise
+    def cleanup(self):
+        """Clean up model resources"""
+        try:
+            if hasattr(self, 'similarity_model'):
+                del self.similarity_model
+            if hasattr(self, 'flan_model'):
+                del self.flan_model
+            torch.cuda.empty_cache()
+            logger.info("Model resources cleaned up successfully")
+        except Exception as e:
+            logger.error(f"Error during cleanup: {e}")
 # Create a global instance
+models = ModelSingleton()
+# Add cleanup function to the global instance
+def cleanup_models():
+    models.cleanup()

main.py CHANGED Viewed

@@ -291,8 +291,9 @@ def compute_marks():
                     student_folder = path_parts[-2]  # Get the parent folder name
                     filename = path_parts[-1]  # Get the actual filename
-                    # Save directly to the images directory with a unique name
-                    htr_filename = f"{student_folder}_{filename}"
                     htr_filepath = os.path.join(images_dir, htr_filename)
                     # Save the file with full permissions
@@ -304,10 +305,10 @@ def compute_marks():
                     log_print(f"Saved file: {htr_filepath}")
-                    # Add to data structure
                     if student_folder not in data:
                         data[student_folder] = []
-                    data[student_folder].append(htr_filepath)
         if not data:
             log_print("No valid image files were found in the upload", "ERROR")
@@ -334,8 +335,20 @@ def compute_marks():
             count = 0
             for image_path in file_paths:
                 try:
                     s_answer = extract_text_from_image(image_path)
-                    log_print(f"\nProcessing {student_folder}/{os.path.basename(image_path)}:")
                     log_print(f"Extracted answer: {s_answer}")
                     if s_answer and count < len(answers):

                     student_folder = path_parts[-2]  # Get the parent folder name
                     filename = path_parts[-1]  # Get the actual filename
+                    # Create a clean filename without any path separators
+                    clean_filename = filename.replace('/', '_').replace('\\', '_')
+                    htr_filename = f"{student_folder}_{clean_filename}"
                     htr_filepath = os.path.join(images_dir, htr_filename)
                     # Save the file with full permissions
                     log_print(f"Saved file: {htr_filepath}")
+                    # Add to data structure with absolute path
                     if student_folder not in data:
                         data[student_folder] = []
+                    data[student_folder].append(os.path.abspath(htr_filepath))
         if not data:
             log_print("No valid image files were found in the upload", "ERROR")
             count = 0
             for image_path in file_paths:
                 try:
+                    log_print(f"\nProcessing image: {image_path}")
+                    log_print(f"Checking if file exists: {os.path.exists(image_path)}")
+                    # Ensure the image path is absolute and exists
+                    if not os.path.isabs(image_path):
+                        image_path = os.path.abspath(image_path)
+                    if not os.path.exists(image_path):
+                        log_print(f"Error: Image file not found at {image_path}", "ERROR")
+                        s_marks[student_folder].append(0)
+                        continue
+                    # Extract text from image
                     s_answer = extract_text_from_image(image_path)
                     log_print(f"Extracted answer: {s_answer}")
                     if s_answer and count < len(answers):

similarity_check/llm_based_scoring/llm.py CHANGED Viewed

@@ -9,68 +9,84 @@ from all_models import models
 # model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
 # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 device = "cuda" if torch.cuda.is_available() else "cpu"
-models.flan_model.to(device)
 def llm_score(correct_answers, answer):
-    score = []
-    for correct_answer in correct_answers:
-        print(correct_answer)
-        print(answer)
-        print()
-        print()
-        prompt = (
-            "You are an expert evaluator of answers. Your response must be a *single numeric score (0-10), not a range.*\n\n"
-            "The user's answer has been converted from handwriting using OCR, so minor spelling, punctuation, or small word variations may exist. "
-            "Focus on meaning rather than transcription errors.\n\n"
-            "### Evaluation Criteria:\n"
-            "- *Correctness (90% weight):* Does the answer accurately convey the meaning of the correct answer?\n"
-            "- *Completeness (10% weight):* Does it cover all key points?\n\n"
-            "### Handling OCR Errors:\n"
-            "- Ignore minor spelling/punctuation mistakes that don't affect meaning.\n"
-            "- Penalize only if word substitutions change the meaning.\n\n"
-            "### Scoring Guidelines:\n"
-            "- *10:* Fully correct and complete (90-100% accurate).\n"
-            "- *From 9 to 8:* Mostly correct, minor missing details (80-90% accurate).\n"
-            "- *From 7 to 6:* Good but missing some key points (60-80% accurate).\n"
-            "- *From 5 to 4:* Average, with several omissions/errors (40-60% accurate).\n"
-            "- *From 3 to 2:* Poor, major meaning errors (20-40% accurate).\n"
-            "- *From 1 to 0:* Incorrect or irrelevant (less than 20% accurate).\n\n"
-            "Compare the answers and assign a *single numeric score (0-10)* based on correctness and completeness.\n\n"
-            "Correct answer:\n"
-            f"{correct_answer}\n\n"
-            "User's answer:\n"
-            f"{answer}\n\n"
-            "Final Score (numeric only, strictly between 0 and 10):")
-        # Tokenize input prompt
-        inputs = models.flan_tokenizer(prompt, return_tensors="pt").to(device)
-        # Generate response
-        with torch.no_grad():
-            outputs = models.flan_model.generate(
-                **inputs,
-                max_length=2048,
-                do_sample=True,
-                num_return_sequences=1,
-                num_beams=5,
-                temperature=0.6,
-                top_p=0.9,
-                early_stopping=True,
-                pad_token_id=models.flan_tokenizer.pad_token_id,
-                eos_token_id=models.flan_tokenizer.eos_token_id,
-                bos_token_id=models.flan_tokenizer.bos_token_id,
-            )
-        # Decode and print response
-        print(models.flan_tokenizer.decode(outputs[0], skip_special_tokens=True))
-        score.append(models.flan_tokenizer.decode(outputs[0], skip_special_tokens=True))
-    return score

 # model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
 # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+# Get device and ensure model is on correct device
 device = "cuda" if torch.cuda.is_available() else "cpu"
+try:
+    models.flan_model.to(device)
+except Exception as e:
+    print(f"Warning: Could not move model to device {device}: {e}")
 def llm_score(correct_answers, answer):
+    try:
+        score = []
+        for correct_answer in correct_answers:
+            try:
+                prompt = (
+                    "You are an expert evaluator of answers. Your response must be a *single numeric score (0-10), not a range.*\n\n"
+                    "The user's answer has been converted from handwriting using OCR, so minor spelling, punctuation, or small word variations may exist. "
+                    "Focus on meaning rather than transcription errors.\n\n"
+                    "### Evaluation Criteria:\n"
+                    "- *Correctness (90% weight):* Does the answer accurately convey the meaning of the correct answer?\n"
+                    "- *Completeness (10% weight):* Does it cover all key points?\n\n"
+                    "### Handling OCR Errors:\n"
+                    "- Ignore minor spelling/punctuation mistakes that don't affect meaning.\n"
+                    "- Penalize only if word substitutions change the meaning.\n\n"
+                    "### Scoring Guidelines:\n"
+                    "- *10:* Fully correct and complete (90-100% accurate).\n"
+                    "- *From 9 to 8:* Mostly correct, minor missing details (80-90% accurate).\n"
+                    "- *From 7 to 6:* Good but missing some key points (60-80% accurate).\n"
+                    "- *From 5 to 4:* Average, with several omissions/errors (40-60% accurate).\n"
+                    "- *From 3 to 2:* Poor, major meaning errors (20-40% accurate).\n"
+                    "- *From 1 to 0:* Incorrect or irrelevant (less than 20% accurate).\n\n"
+                    "Compare the answers and assign a *single numeric score (0-10)* based on correctness and completeness.\n\n"
+                    "Correct answer:\n"
+                    f"{correct_answer}\n\n"
+                    "User's answer:\n"
+                    f"{answer}\n\n"
+                    "Final Score (numeric only, strictly between 0 and 10):")
+                # Tokenize input prompt
+                inputs = models.flan_tokenizer(prompt, return_tensors="pt").to(device)
+                # Generate response
+                with torch.no_grad():
+                    outputs = models.flan_model.generate(
+                        **inputs,
+                        max_length=2048,
+                        do_sample=True,
+                        num_return_sequences=1,
+                        num_beams=5,
+                        temperature=0.6,
+                        top_p=0.9,
+                        early_stopping=True,
+                        pad_token_id=models.flan_tokenizer.pad_token_id,
+                        eos_token_id=models.flan_tokenizer.eos_token_id,
+                        bos_token_id=models.flan_tokenizer.bos_token_id,
+                    )
+                # Decode and clean response
+                response = models.flan_tokenizer.decode(outputs[0], skip_special_tokens=True)
+                try:
+                    # Extract numeric score from response
+                    score_value = float(''.join(filter(str.isdigit, response)))
+                    score_value = max(0, min(10, score_value))  # Clamp between 0 and 10
+                    score.append(score_value)
+                except ValueError:
+                    print(f"Warning: Could not extract numeric score from response: {response}")
+                    score.append(0)
+            except Exception as e:
+                print(f"Error processing answer: {str(e)}")
+                score.append(0)
+        return score
+    except Exception as e:
+        print(f"Error in llm_score: {str(e)}")
+        return [0] * len(correct_answers)

similarity_check/semantic_meaning_check/semantic.py CHANGED Viewed

@@ -16,6 +16,7 @@ from all_models import models
 # Use custom directory for gensim data
 gensim_data_dir = os.getenv('GENSIM_DATA_DIR', tempfile.gettempdir())
 # Load fasttext with error handling
 try:
@@ -39,58 +40,89 @@ except Exception as e:
 # nltk.download('stopwords')
 def question_vector_sentence(correct_answer):
-    return models.similarity_model.encode(correct_answer, convert_to_tensor=True)
 def similarity_model_score(correct_answer_vector, answer):
-    answer_embedding = models.similarity_model.encode(answer, convert_to_tensor=True)
-    cosine_score = float('-inf')
-    for i in correct_answer_vector:
-        cosine_score = max(cosine_score, util.pytorch_cos_sim(i, answer_embedding))
-    return cosine_score
 def preprocess(sentence):
-    # Lowercase and remove punctuation
-    sentence = sentence.lower()
-    # Tokenize
-    words = word_tokenize(sentence)
-    # Remove stop words
-    words = [word for word in words if word not in stopwords.words('english')]
-    return words
 def sentence_to_vec(tokens, model):
-    # Filter words that are in the Word2Vec vocabulary
-    valid_words = [word for word in tokens if word in model]
-    # If there are no valid words, return a zero vector
-    if not valid_words:
-        return np.zeros(model.vector_size)
-    # Compute the average vector
-    word_vectors = [model[word] for word in valid_words]
-    sentence_vector = np.mean(word_vectors, axis=0)
-    return sentence_vector
 def compute_scm(tokens1, tokens2, model):
-    dictionary = corpora.Dictionary([tokens1, tokens2])
-    tokens1 = dictionary.doc2bow(tokens1)
-    tokens2 = dictionary.doc2bow(tokens2)
-    termsim_index = WordEmbeddingSimilarityIndex(model)
-    termsim_matrix = SparseTermSimilarityMatrix(termsim_index, dictionary)
-    similarity = termsim_matrix.inner_product(tokens1, tokens2, normalized=(True, True))
-    return similarity
 def question_vector_word(correct_answer):
-    return preprocess(correct_answer)
 def fasttext_similarity(correct_answer_vector, answer):
-    preprocess_answer = preprocess(answer)
-    soft_cosine = float('-inf')
-    for i in correct_answer_vector:
-        soft_cosine = max(compute_scm(i, preprocess_answer, fasttext), soft_cosine)
-    return soft_cosine

 # Use custom directory for gensim data
 gensim_data_dir = os.getenv('GENSIM_DATA_DIR', tempfile.gettempdir())
+os.environ['GENSIM_DATA_DIR'] = gensim_data_dir
 # Load fasttext with error handling
 try:
 # nltk.download('stopwords')
 def question_vector_sentence(correct_answer):
+    try:
+        return models.similarity_model.encode(correct_answer, convert_to_tensor=True)
+    except Exception as e:
+        print(f"Error in question_vector_sentence: {str(e)}")
+        return None
 def similarity_model_score(correct_answer_vector, answer):
+    try:
+        if correct_answer_vector is None:
+            return 0.0
+        answer_embedding = models.similarity_model.encode(answer, convert_to_tensor=True)
+        cosine_score = float('-inf')
+        for i in correct_answer_vector:
+            cosine_score = max(cosine_score, util.pytorch_cos_sim(i, answer_embedding))
+        return float(cosine_score)  # Convert to float for JSON serialization
+    except Exception as e:
+        print(f"Error in similarity_model_score: {str(e)}")
+        return 0.0
 def preprocess(sentence):
+    try:
+        # Lowercase and remove punctuation
+        sentence = sentence.lower()
+        # Tokenize
+        words = word_tokenize(sentence)
+        # Remove stop words
+        words = [word for word in words if word not in stopwords.words('english')]
+        return words
+    except Exception as e:
+        print(f"Error in preprocess: {str(e)}")
+        return []
 def sentence_to_vec(tokens, model):
+    try:
+        # Filter words that are in the Word2Vec vocabulary
+        valid_words = [word for word in tokens if word in model]
+        # If there are no valid words, return a zero vector
+        if not valid_words:
+            return np.zeros(model.vector_size)
+        # Compute the average vector
+        word_vectors = [model[word] for word in valid_words]
+        sentence_vector = np.mean(word_vectors, axis=0)
+        return sentence_vector
+    except Exception as e:
+        print(f"Error in sentence_to_vec: {str(e)}")
+        return np.zeros(300)  # Return zero vector as fallback
 def compute_scm(tokens1, tokens2, model):
+    try:
+        dictionary = corpora.Dictionary([tokens1, tokens2])
+        tokens1 = dictionary.doc2bow(tokens1)
+        tokens2 = dictionary.doc2bow(tokens2)
+        termsim_index = WordEmbeddingSimilarityIndex(model)
+        termsim_matrix = SparseTermSimilarityMatrix(termsim_index, dictionary)
+        similarity = termsim_matrix.inner_product(tokens1, tokens2, normalized=(True, True))
+        return float(similarity)  # Convert to float for JSON serialization
+    except Exception as e:
+        print(f"Error in compute_scm: {str(e)}")
+        return 0.5  # Return default similarity score
 def question_vector_word(correct_answer):
+    try:
+        return preprocess(correct_answer)
+    except Exception as e:
+        print(f"Error in question_vector_word: {str(e)}")
+        return []
 def fasttext_similarity(correct_answer_vector, answer):
+    try:
+        preprocess_answer = preprocess(answer)
+        soft_cosine = float('-inf')
+        for i in correct_answer_vector:
+            soft_cosine = max(compute_scm(i, preprocess_answer, fasttext), soft_cosine)
+        return float(soft_cosine)  # Convert to float for JSON serialization
+    except Exception as e:
+        print(f"Error in fasttext_similarity: {str(e)}")
+        return 0.0

similarity_check/tf_idf/tf_idf_score.py CHANGED Viewed

@@ -4,132 +4,155 @@ from nltk.tokenize import word_tokenize
 from nltk.corpus import wordnet
 from collections import Counter
 import string
 def remove_stopwords(sentence):
-    # converting into words
-    words = word_tokenize(sentence)
-    # Get the set of English stop words
-    stop_words = set(stopwords.words('english'))
-    # Remove stop words from the list of words
-    filtered_words = [word for word in words if word.lower() not in stop_words]
-    words = [word.lower() for word in words if word.isalpha() and len(word)>1]
-    return words
 def get_synonyms(word):
-    synonyms = set()
-    for syn in wordnet.synsets(word):
-        for lemma in syn.lemmas():
-            synonyms.add(lemma.name().lower())
-    return synonyms
 def process_sentence(words):
-    # Find synonyms for each word
-    synonym_map = {}
-    for word in words:
-        synonyms = get_synonyms(word)
-        synonyms.add(word)  # Ensure the word itself is included if no synonyms are found
-        synonym_map[word] = list(synonyms)
-    return synonym_map
 def tf(dict1):
-#     print(dict1)
-    no_of_terms_in_document = len(dict1)
-    word_frequency = {}
-    for i in dict1:
-        count = 0
-        for j in dict1:
-            if i in dict1[j]:
-                count+=1
-        word_frequency[i] = count
-#     print(word_frequency)
-    for i in word_frequency:
-        word_frequency[i] = word_frequency[i]/no_of_terms_in_document
-    return word_frequency
 def idf(di):
-    no_of_documents = len(di)
-    new_dict = {}
-    for d in range(len(di)):
-        for i in di[d]:
-            if i not in new_dict:
-                new_dict[i]=set()
-                new_dict[i].add(d)
-            else:
-                new_dict[i].add(d)
-    r = {}
-    for i in new_dict:
-        r[i]=len(new_dict[i])/no_of_documents
-    return r
 def total_tf_idf_value(tf_idf_word_values,synonyms_words):
-    value = 0
-    for i in synonyms_words:
-        for j in synonyms_words[i]:
-            if j in tf_idf_word_values:
-                value += tf_idf_word_values[j]
-                break
-    return value
 def create_tfidf_values(correct_answer):
-    correct_answer_words = []
-    for i in correct_answer:
-        correct_answer_words.append(remove_stopwords(i))
-    correct_synonyms_words = []
-    for i in correct_answer_words:
-        correct_synonyms_words.append(process_sentence(i))
-    tf_ = []
-    for i in correct_synonyms_words:
-        tf_.append(tf(i))
-    idf_values = idf(correct_synonyms_words)
-    tf_idf_word_values = {}
-    count = 0
-    for correct_synonyms_word in correct_synonyms_words:
-        for i in correct_synonyms_word:
-            value = tf_[count][i]*idf_values[i]
-            if i in tf_idf_word_values:
-                tf_idf_word_values[i] = max(tf_idf_word_values[i],value)
-            else:
-                tf_idf_word_values[i] = value
-        count+=1
-    for i in tf_idf_word_values:
-        tf_idf_word_values[i] =  round(tf_idf_word_values[i], 4)
-    tfidf_correct_ans = float('inf')
-    for i in correct_synonyms_words:
-        tfidf_correct_ans = min(total_tf_idf_value(tf_idf_word_values,i),tfidf_correct_ans)
-    return tf_idf_word_values,tfidf_correct_ans
 def tfidf_answer_score(answer,tf_idf_word_values,max_tfidf,marks=10):
-    answer = remove_stopwords(answer)
-    answer_synonyms_words = process_sentence(answer)
-    value = total_tf_idf_value(tf_idf_word_values,answer_synonyms_words)
-    # print("tfidf value of answer: ",value, "  ,  " "minimum tfidf value of correct answer answer: " ,max_tfidf)
-    score = (value/max_tfidf)*marks
-    # print(score)
-    if score>10:
-        return 10
-    else:
-        return score

 from nltk.corpus import wordnet
 from collections import Counter
 import string
+import os
+import tempfile
+# Set NLTK data path to temporary directory
+nltk_data_dir = os.getenv('NLTK_DATA', tempfile.gettempdir())
+os.environ['NLTK_DATA'] = nltk_data_dir
 def remove_stopwords(sentence):
+    try:
+        # converting into words
+        words = word_tokenize(sentence)
+        # Get the set of English stop words
+        stop_words = set(stopwords.words('english'))
+        # Remove stop words from the list of words
+        filtered_words = [word for word in words if word.lower() not in stop_words]
+        words = [word.lower() for word in words if word.isalpha() and len(word)>1]
+        return words
+    except Exception as e:
+        print(f"Error in remove_stopwords: {str(e)}")
+        return []
 def get_synonyms(word):
+    try:
+        synonyms = set()
+        for syn in wordnet.synsets(word):
+            for lemma in syn.lemmas():
+                synonyms.add(lemma.name().lower())
+        return synonyms
+    except Exception as e:
+        print(f"Error in get_synonyms: {str(e)}")
+        return {word.lower()}
 def process_sentence(words):
+    try:
+        # Find synonyms for each word
+        synonym_map = {}
+        for word in words:
+            synonyms = get_synonyms(word)
+            synonyms.add(word)  # Ensure the word itself is included if no synonyms are found
+            synonym_map[word] = list(synonyms)
+        return synonym_map
+    except Exception as e:
+        print(f"Error in process_sentence: {str(e)}")
+        return {word: [word] for word in words}
 def tf(dict1):
+    try:
+        no_of_terms_in_document = len(dict1)
+        word_frequency = {}
+        for i in dict1:
+            count = 0
+            for j in dict1:
+                if i in dict1[j]:
+                    count+=1
+            word_frequency[i] = count
+        for i in word_frequency:
+            word_frequency[i] = word_frequency[i]/no_of_terms_in_document
+        return word_frequency
+    except Exception as e:
+        print(f"Error in tf: {str(e)}")
+        return {}
 def idf(di):
+    try:
+        no_of_documents = len(di)
+        new_dict = {}
+        for d in range(len(di)):
+            for i in di[d]:
+                if i not in new_dict:
+                    new_dict[i]=set()
+                    new_dict[i].add(d)
+                else:
+                    new_dict[i].add(d)
+        r = {}
+        for i in new_dict:
+            r[i]=len(new_dict[i])/no_of_documents
+        return r
+    except Exception as e:
+        print(f"Error in idf: {str(e)}")
+        return {}
 def total_tf_idf_value(tf_idf_word_values,synonyms_words):
+    try:
+        value = 0
+        for i in synonyms_words:
+            for j in synonyms_words[i]:
+                if j in tf_idf_word_values:
+                    value += tf_idf_word_values[j]
+                    break
+        return value
+    except Exception as e:
+        print(f"Error in total_tf_idf_value: {str(e)}")
+        return 0
 def create_tfidf_values(correct_answer):
+    try:
+        correct_answer_words = []
+        for i in correct_answer:
+            correct_answer_words.append(remove_stopwords(i))
+        correct_synonyms_words = []
+        for i in correct_answer_words:
+            correct_synonyms_words.append(process_sentence(i))
+        tf_ = []
+        for i in correct_synonyms_words:
+            tf_.append(tf(i))
+        idf_values = idf(correct_synonyms_words)
+        tf_idf_word_values = {}
+        count = 0
+        for correct_synonyms_word in correct_synonyms_words:
+            for i in correct_synonyms_word:
+                value = tf_[count][i]*idf_values[i]
+                if i in tf_idf_word_values:
+                    tf_idf_word_values[i] = max(tf_idf_word_values[i],value)
+                else:
+                    tf_idf_word_values[i] = value
+            count+=1
+        for i in tf_idf_word_values:
+            tf_idf_word_values[i] = round(tf_idf_word_values[i], 4)
+        tfidf_correct_ans = float('inf')
+        for i in correct_synonyms_words:
+            tfidf_correct_ans = min(total_tf_idf_value(tf_idf_word_values,i),tfidf_correct_ans)
+        return tf_idf_word_values,tfidf_correct_ans
+    except Exception as e:
+        print(f"Error in create_tfidf_values: {str(e)}")
+        return {}, 0.0
 def tfidf_answer_score(answer,tf_idf_word_values,max_tfidf,marks=10):
+    try:
+        answer = remove_stopwords(answer)
+        answer_synonyms_words = process_sentence(answer)
+        value = total_tf_idf_value(tf_idf_word_values,answer_synonyms_words)
+        score = (value/max_tfidf)*marks if max_tfidf > 0 else 0
+        return min(10, max(0, score))  # Clamp between 0 and 10
+    except Exception as e:
+        print(f"Error in tfidf_answer_score: {str(e)}")
+        return 0