Spaces:

yamanavijayavardhan
/

answer-grading-app

Running

App Files Files Community

yamanavijayavardhan commited on Mar 28

Commit

51c49bc

1 Parent(s): 92ba605

Initial upload of answer grading application

Browse files

Files changed (25) hide show

Dockerfile +10 -0
HTR/app.py +23 -0
HTR/hcr.py +27 -0
HTR/spell_and_gramer_check.py +39 -0
HTR/strike.py +45 -0
HTR/word.py +288 -0
all_models.py +28 -0
correct_answer_generation/answer_generation.py +46 -0
correct_answer_generation/answer_generation_database_creation.py +22 -0
correct_answer_generation/create_database.py +81 -0
correct_answer_generation/related_content_creation.py +29 -0
main.py +217 -0
models/vit-base-beans/all_results.json +13 -0
models/vit-base-beans/config.json +24 -0
models/vit-base-beans/eval_results.json +8 -0
models/vit-base-beans/model.safetensors +3 -0
models/vit-base-beans/runs/Jun06_19-03-39_DELL/events.out.tfevents.1717715031.DELL.3528.0 +3 -0
models/vit-base-beans/runs/Jun06_19-03-39_DELL/events.out.tfevents.1717720711.DELL.3528.1 +3 -0
models/vit-base-beans/train_results.json +8 -0
models/vit-base-beans/trainer_state.json +307 -0
models/vit-base-beans/training_args.bin +3 -0
similarity_check/llm_based_scoring/llm.py +76 -0
similarity_check/semantic_meaning_check/semantic.py +82 -0
similarity_check/tf_idf/tf_idf_score.py +142 -0
templates/index.html +367 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,10 @@

+FROM python:3.9
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . /code
+CMD ["python", "main.py"]

HTR/app.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import cv2
+from HTR.word import convert_image
+from HTR.strike import struck_images
+from HTR.hcr import text
+from HTR.spell_and_gramer_check import spell_grammer
+# Define a function to extract text from an image
+def extract_text_from_image(img_path):
+    img = cv2.imread(img_path)
+    # print(img)
+    imgs = convert_image(img)
+    images_path = struck_images(imgs)
+    t = text(images_path)
+    # print("\n\n\n\n\n\n\n")
+    # print(t)
+    t = spell_grammer(t)
+    # t = text
+    # print("\n\n\n\n\n\n\n")
+    # print(t)
+    return t
+# extract_text_from_image("ans_image/1.jpg")

HTR/hcr.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+from PIL import Image
+import cv2
+MODEL_NAME = "microsoft/trocr-large-handwritten"
+processor = TrOCRProcessor.from_pretrained(MODEL_NAME)
+model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME)
+def text(image_cv):
+    t = ""
+    for i in image_cv:
+        img_rgb = cv2.cvtColor(i, cv2.COLOR_BGR2RGB)
+        image = Image.fromarray(img_rgb)
+        # image = Image.open(i).convert("RGB")
+        pixel_values = processor(image, return_tensors="pt").pixel_values
+        generated_ids = model.generate(pixel_values)
+        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        t = t+generated_text.replace(" ", "")+ " "
+    # print(t)
+    return t

HTR/spell_and_gramer_check.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import language_tool_python
+from spellchecker import SpellChecker
+tool = language_tool_python.LanguageTool('en-US')
+def check_grammar(answer):
+    my_matches = tool.check(answer)
+    corrected_text = tool.correct(answer)
+    return corrected_text
+def correct_spelling(text):
+    spell = SpellChecker()
+    words = text.split()
+    # Find misspelled words
+    misspelled = spell.unknown(words)
+    # Correct misspelled words
+    corrected_text = []
+    for word in words:
+        if word in misspelled:
+            correction = spell.correction(word)
+            # If no correction found or correction is None, keep the original word
+            if correction is None:
+                corrected_text.append(word)
+            else:
+                corrected_text.append(correction)
+        else:
+            corrected_text.append(word)
+    return " ".join(map(str, corrected_text))
+def spell_grammer(text):
+    spell_check_text = correct_spelling(text)
+    corrected_text = check_grammar(spell_check_text)
+    return corrected_text

HTR/strike.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import pandas as pd
+import numpy as np
+import tensorflow as tf
+import torch
+import os
+import cv2
+from transformers import AutoModelForImageClassification
+def image_preprocessing(image):
+    images=[]
+    for i in image:
+        binary_image = i
+        binary_image = cv2.resize(binary_image, (224, 224))
+        binary_image = cv2.merge([binary_image, binary_image, binary_image])
+        binary_image = binary_image/255
+        binary_image = torch.from_numpy(binary_image)
+        images.append(binary_image)
+    return images
+def predict_image(image_path, model):
+    preprocessed_img = image_preprocessing(image_path)
+    images = torch.stack(preprocessed_img)
+    images = images.permute(0, 3, 1, 2)
+    predictions = model(images).logits.detach().numpy()
+    return predictions
+model = AutoModelForImageClassification.from_pretrained("models/vit-base-beans")
+def struck_images(word__image):
+    predictions = predict_image(word__image, model)
+    not_struck =[]
+    for i in range(len(predictions)):
+        if predictions[i].argmax().item() == 0:
+            not_struck.append(word__image[i])
+    # print(not_struck)
+    return not_struck
+# struck_images()

HTR/word.py ADDED Viewed

	@@ -0,0 +1,288 @@

+import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+import sys
+import os
+cordinates =[]
+def four_point_transform(image, pts):
+    rect = pts
+    (tl, tr, br, bl) = rect
+    # Compute the width of the new image
+    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
+    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
+    maxWidth = max(int(widthA), int(widthB))
+    # Compute the height of the new image
+    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
+    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
+    maxHeight = max(int(heightA), int(heightB))
+    dst = np.array([
+        [0, 0],
+        [maxWidth - 1, 0],
+        [maxWidth - 1, maxHeight - 1],
+        [0, maxHeight - 1]], dtype="float32")
+    rect = np.array(rect, dtype="float32")
+    M = cv2.getPerspectiveTransform(rect, dst)
+    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
+    return warped
+def remove_shadow(image):
+    rgb_planes = cv2.split(image)
+    result_planes = []
+    result_norm_planes = []
+    for plane in rgb_planes:
+        dilated_img = cv2.dilate(plane, np.ones((7,7), np.uint8))
+        bg_img = cv2.medianBlur(dilated_img, 21)
+        diff_img = 255 - cv2.absdiff(plane, bg_img)
+        norm_img = cv2.normalize(diff_img,None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
+        result_planes.append(diff_img)
+        result_norm_planes.append(norm_img)
+    result = cv2.merge(result_planes)
+    result_norm = cv2.merge(result_norm_planes)
+    return result,result_norm
+def analise(image):
+    global line, binary_image1, x_scaling , y_scaling
+    kernel = np.ones((1,250),np.uint8)
+    dilation = cv2.dilate(image, kernel, iterations = 2)
+    # cv2.namedWindow("Image", cv2.WINDOW_NORMAL)
+    # cv2.imshow('Image',dilation)
+    # cv2.waitKey(0)
+    contours, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    for i in reversed(contours):
+            x, y, w, h = cv2.boundingRect(i)
+            if cv2.contourArea(i)<20 :
+                continue
+            elif h < 8:
+                continue
+            else:
+                scaling_factor_in_y = 0.5
+                scaling_factor_in_x = 0
+                resized_contour = i.copy()
+                resized_contour = i * [x_scaling, y_scaling]
+                resized_contour = resized_contour.astype(int)
+                final_image__ = np.zeros_like(binary_image1)
+                cv2.drawContours(final_image__, [resized_contour], 0, (255), -1)
+                kernel_dil = np.ones((3,3),np.uint8)
+                final_image__ = cv2.dilate(final_image__,kernel_dil,iterations = 3)
+                line_image_final = cv2.bitwise_and(final_image__, binary_image1)
+                line.append(line_image_final)
+                # cv2.namedWindow("Line image", cv2.WINDOW_NORMAL)
+                # cv2.imshow('Line image',line_image_final)
+                # cv2.waitKey(0)
+def image_resize_and_errosion(image):
+    height, width = image.shape[:2]
+    height = height + 1 * height
+    height = int(height)
+    resized_image = cv2.resize(image, (width, height))
+    kernel = np.ones((13,1),np.uint8)
+    erosion = cv2.erode(resized_image,kernel,iterations = 1)
+    return erosion
+x_scaling = 0
+y_scaling = 0
+binary_image1 = 0
+line = 0
+line_length = 0
+count = 0
+def convert_image(img):
+    folder_path = 'images'
+    for filename in os.listdir(folder_path):
+        file_path = os.path.join(folder_path, filename)
+        try:
+            if os.path.isfile(file_path):
+                os.remove(file_path)
+        except Exception as e:
+            print(f"Error deleting file {file_path}: {e}")
+    global x_scaling,y_scaling,binary_image1,line,line_lenght,count
+    # img = cv2.imread(image_file)
+    img_copy = np.copy(img)
+    line_lenght = 250
+    rect_image = img
+    # removing the shadow in the image
+    image1, image2_ = remove_shadow(rect_image)
+    # converting into grayscale
+    gray_ = cv2.cvtColor(image2_,cv2.COLOR_BGR2GRAY)
+    # cv2.namedWindow("grayscale image", cv2.WINDOW_NORMAL)
+    # cv2.imshow('grayscale image',gray_)
+    # cv2.waitKey(0)
+    # convrting into binaryimage
+    _, binary_image_ = cv2.threshold(gray_, 200, 255, cv2.THRESH_BINARY)
+    # cv2.namedWindow("binary image", cv2.WINDOW_NORMAL)
+    # cv2.imshow('binary image',binary_image_)
+    # cv2.waitKey(0)
+    inverted_binary_image_ = 255 - binary_image_
+    binary_image1 = np.copy(inverted_binary_image_)
+    y_height ,x_width= rect_image.shape[:2]
+    # print("image width, height =", x_width, y_height)
+    # resizing the image
+    new_width = 500*5
+    new_height = 705*5
+    x_scaling = x_width/new_width
+    y_scaling = y_height/new_height
+    # print("After resizing width, height", new_width , new_height)
+    rect_image = cv2.resize(rect_image, (new_width, new_height), interpolation=cv2.INTER_NEAREST)
+    # cv2.namedWindow("resized image", cv2.WINDOW_NORMAL)
+    # cv2.imshow('resized image',rect_image)
+    # cv2.waitKey(0)
+    # removing the shadow in the image
+    image1, image2 = remove_shadow(rect_image)
+    # converting into grayscale
+    gray = cv2.cvtColor(image2,cv2.COLOR_BGR2GRAY)
+    # cv2.namedWindow("grayscale image", cv2.WINDOW_NORMAL)
+    # cv2.imshow('grayscale image',gray)
+    # cv2.waitKey(0)
+    # convrting into binaryimage
+    _, binary_image = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+    _, binary_image = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
+    # cv2.namedWindow("binary image", cv2.WINDOW_NORMAL)
+    # cv2.imshow('binary image',gray)
+    # cv2.waitKey(0)
+    # inverting the pixel
+    inverted_binary_image = 255 - binary_image
+    kernel = np.ones((2,2),np.uint8)
+    # performing  erosion to remove noise
+    erosion = cv2.erode(inverted_binary_image,kernel,iterations = 1)
+    # cv2.namedWindow("erosion", cv2.WINDOW_NORMAL)
+    # cv2.imshow('erosion',erosion)
+    # cv2.waitKey(0)
+    # performing Dilution operatiom
+    dilation = cv2.dilate(erosion,kernel,iterations = 1)
+    # cv2.namedWindow("dilation", cv2.WINDOW_NORMAL)
+    # cv2.imshow('dilation',erosion)
+    # cv2.waitKey(0)
+    new_image = np.copy(dilation)
+    new_image = 255 - new_image
+    # defining kernal size
+    kernel = np.ones((1,250),np.uint8)
+    # performing Dilution operatiom
+    dilation_1 = cv2.dilate(dilation,kernel,iterations = 2)
+    # cv2.namedWindow("dilation_1", cv2.WINDOW_NORMAL)
+    # cv2.imshow('dilation_1',dilation_1)
+    # cv2.waitKey(0)
+    contours, _ = cv2.findContours(dilation_1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    line = []
+    # line saparation
+    for i in reversed(contours):
+        x, y, w, h = cv2.boundingRect(i)
+        if cv2.contourArea(i)<20:
+            continue
+        elif h < 10:
+            continue
+        else:
+            cv2.drawContours(new_image, [i],-1,(0),2)
+            final_image_ = np.zeros_like(binary_image)
+            cv2.drawContours(final_image_, [i], 0, (255), -1)
+            # cv2.namedWindow("final_image_", cv2.WINDOW_NORMAL)
+            # cv2.imshow('final_image_',final_image_)
+            # cv2.waitKey(0)
+            line_image = cv2.bitwise_and(final_image_, dilation)
+            # cv2.namedWindow("line_image", cv2.WINDOW_NORMAL)
+            # cv2.imshow('line_image',line_image)
+            # cv2.waitKey(0)
+            analise(line_image)
+    count = 0
+    kernel1 = np.ones((8,8),np.uint8)
+    word__image = [] # newly added
+    for line_image in line:
+        dilation_2 = cv2.dilate(line_image,kernel1,iterations = 2)
+        contours1, _ = cv2.findContours(dilation_2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        sorted_contours = sorted(contours1, key=lambda c: cv2.boundingRect(c)[0])
+        for j in sorted_contours:
+            x1,y1,w1,h1 = cv2.boundingRect(j)
+            final_image = line_image[y1:y1+h1,x1:x1+w1]
+            image_name ="images/"+str(count)+".png"
+            final_image = 255 - final_image
+            word__image.append(final_image)# newly added
+            # cv2.imwrite(image_name, final_image)
+            count=count+1
+    # cv2.waitKey(0)
+#    cv2.destroyAllWindows()
+    return word__image
+# img = cv2.imread("ans_image/1.jpg")
+# convert_image(img)

all_models.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from sentence_transformers import SentenceTransformer
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+class ModelSingleton:
+    _instance = None
+    _initialized = False
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+    def __init__(self):
+        if not self._initialized:
+            # Sentence transformer model
+            SENTENCE_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
+            self.similarity_tokenizer = AutoTokenizer.from_pretrained(SENTENCE_MODEL)
+            self.similarity_model = SentenceTransformer(SENTENCE_MODEL)
+            # Flan-T5-xl model only
+            FLAN_MODEL = "google/flan-t5-xl"
+            self.flan_tokenizer = AutoTokenizer.from_pretrained(FLAN_MODEL)
+            self.flan_model = AutoModelForSeq2SeqLM.from_pretrained(FLAN_MODEL)
+            self._initialized = True
+# Create a global instance
+models = ModelSingleton()

correct_answer_generation/answer_generation.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import sys
+import os
+import torch
+sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+from all_models import models
+def query_(query, doc):
+    input_text = f"""
+    You are an AI assistant designed to extract relevant information from a document and generate a clear, concise answer.
+    Question: {query}
+    Provide a *single-paragraph response of 250 words* that summarizes key details, explains the answer logically, and avoids repetition. Ignore irrelevant details like page numbers, author names, and metadata.
+    Context:
+    "{doc}"
+    Answer:
+    """
+    # Move inputs to the same device as the model
+    device = next(models.flan_model.parameters()).device
+    inputs = models.flan_tokenizer(input_text, return_tensors="pt").to(device)
+    input_length = inputs["input_ids"].shape[1]
+    max_tokens = input_length + 180
+    with torch.no_grad():
+        outputs = models.flan_model.generate(
+            **inputs,
+            do_sample=True,
+            max_length=max_tokens,
+            min_length=100,
+            early_stopping=True,
+            temperature=0.7,
+            top_k=50,
+            top_p=0.9,
+            repetition_penalty=1.2,
+            num_beams=3
+        )
+    answer = models.flan_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # print(answer)
+    # answer = extract_answer(answer)
+    return answer

correct_answer_generation/answer_generation_database_creation.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import os
+from correct_answer_generation.create_database import create_database_main
+from correct_answer_generation.related_content_creation import doc_creation
+from correct_answer_generation.answer_generation import query_
+def database_creation(path):
+    create_database_main(path)
+def answer_generation(path,query):
+    # collection_name = os.path.splitext(os.path.basename(path))[0]
+    path = path.replace("/", "_")
+    data = doc_creation(query,path)
+    correct_answers = query_(query,data)
+    return correct_answers
+# ans = answer_generation("OperatingSystems","What is the process, and how does it differ from a program?")
+# # data = doc_creation(q,"OperatingSystems")

correct_answer_generation/create_database.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import fitz  # PyMuPDF
+import re
+import chromadb
+import sys
+import os
+import uuid
+sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+from all_models import models
+def clean_text(text):
+    # Keep only letters, numbers, punctuation, whitespace, and newlines
+    cleaned_text = re.sub(r"[^a-zA-Z0-9\s.,!?;:'\"()\-]", "", text)
+    return cleaned_text
+def extract_text_from_pdf(pdf_path):
+    text = ""
+    with fitz.open(pdf_path) as doc:
+        for page in doc:
+            page_text = page.get_text()
+            cleaned_text = clean_text(page_text)
+            text += cleaned_text
+    return text
+def clean_data(text):
+    cleaned_text = re.sub(r'\n{2,}', '. \n', text)  # Replace multiple newlines with a single newline
+    cleaned_text = re.sub(r' {2,}', '. \n', cleaned_text)  # Replace multiple spaces with a newline
+    return cleaned_text.strip()  # Strip leading/trailing whitespace
+def combine_list(strings):
+    combined_list = []
+    current_combined = ""
+    for string in strings:
+        word_count = len(string.split())
+        if len(current_combined.split()) < 20:
+            current_combined += " " + string.strip()  # Adding space before new string
+        # If the combined string reaches at least 20 words, add it to the final list
+        if len(current_combined.split()) >= 20:
+            combined_list.append(current_combined)  # Strip to remove leading/trailing whitespace
+            current_combined = ""  # Reset for the next round
+    if current_combined:
+        combined_list.append(current_combined.strip())
+    return combined_list
+def create_databse(data, name):
+    # Initialize the Persistent Client
+    client = chromadb.PersistentClient(path="correct_answer_generation/chroma_db")
+    collection_names = client.list_collections()
+    if name in collection_names:
+        client.delete_collection(name)  # Delete the old collection
+    # Create a Collection
+    collection = client.create_collection(name)
+    # Generate embeddings using the singleton model
+    embeddings = models.similarity_model.encode(data, batch_size=32, convert_to_tensor=True)
+    # Create documents and add them to the collection
+    unique_id = [str(uuid.uuid4()) for _ in range(len(embeddings))]
+    collection.add(
+        documents=data,
+        ids=unique_id
+    )
+def create_database_main(path):
+    pdf_path = path
+    pdf_text = extract_text_from_pdf(pdf_path)
+    data = clean_data(pdf_text)
+    data = data.split('. \n')
+    for i in range(len(data)):
+        data[i] = re.sub(r' \n', ' ', data[i])
+        data[i] = re.sub(r'\s+', ' ', data[i])
+    data = [text for text in data if len(text) >= 2]
+    data = combine_list(data)
+    path = path.replace("/", "_")
+    create_databse(data, path)

correct_answer_generation/related_content_creation.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import chromadb
+def doc_creation(q,collection_name):
+    client_ = chromadb.PersistentClient(path="correct_answer_generation/chroma_db")
+    # collection = client_.get_collection(name='OperatingSystems')
+    collection = client_.get_collection(name=collection_name)
+    results = collection.query(
+    query_texts=[q],
+    n_results=7, # how many results to return
+    include = ['documents']  # newly added
+    )
+    data = ""
+    for i in results['documents'][0]:
+        data += " "+i
+        # print(i)
+    return data
+# q = "What is the difference between a process and a program?"
+# data = doc_creation(q,"OperatingSystems")

main.py ADDED Viewed

	@@ -0,0 +1,217 @@

+from flask import Flask, request, jsonify, render_template
+import os
+import json
+import torch
+from werkzeug.utils import secure_filename
+from HTR.app import extract_text_from_image
+from correct_answer_generation.answer_generation_database_creation import database_creation, answer_generation
+from similarity_check.tf_idf.tf_idf_score import create_tfidf_values, tfidf_answer_score
+from similarity_check.semantic_meaning_check.semantic import similarity_model_score, fasttext_similarity,question_vector_sentence,question_vector_word
+from similarity_check.llm_based_scoring.llm import llm_score
+app = Flask(__name__)
+UPLOAD_FOLDER = 'uploads'
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+@app.route('/')
+def index():
+    return render_template('index.html')
+def new_value(value, old_min, old_max, new_min, new_max):
+    new_value = new_min + ((value - old_min) * (new_max - new_min)) / (old_max - old_min)
+    return new_value
+@app.route('/compute_answers', methods=['POST'])
+def compute_answers():
+    query_file = request.files.get('query_file')
+    if not query_file:
+        return jsonify({"error": "Missing query file"}), 400
+    queries = query_file.read().decode('utf-8').splitlines()
+    # print(queries)
+    file_type = request.form.get('file_type')
+    ans_csv_file = request.files.get('ans_csv_file')
+    if file_type == "csv":
+        ans_csv_file = ans_csv_file.read().decode('utf-8').splitlines()
+        c_answers = []
+        # print(ans_csv_file)
+        for i in ans_csv_file:
+            c_answers.append(i.split('\\n'))
+        # print(c_answers)
+        return jsonify({"answers": c_answers}), 200
+    try:
+        c_answers = []
+        if file_type == "csv":
+            # Process answer CSV file
+            answers = ans_csv_file.read().decode('utf-8').splitlines()
+            # print(answers)
+            # Implement CSV processing logic
+            c_answers = [f"Processed query: {query}" for query in queries]
+        elif file_type == 'pdf':
+            for query in queries:
+                folder_path = 'Knowledge_Retriever_pdf'
+                pdf_files = [f"{folder_path}/{file}" for file in os.listdir(folder_path) if file.endswith('.pdf')]
+                for i in pdf_files:
+                    database_creation(i)
+            for i in queries:
+                ans = []
+                for j in pdf_files:
+                    ans.append(answer_generation(j,i))
+                c_answers.append(ans)
+        else:
+            return jsonify({"error": "Unsupported file type"}), 400
+        # print(c_answers)
+        return jsonify({"answers": c_answers}), 200
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+@app.route('/compute_marks', methods=['POST'])
+def compute_marks():
+    try:
+        a = request.form.get('answers')
+        a = json.loads(a)
+        answers = []
+        for i in a:
+            ans = i.split('\n\n')
+            answers.append(ans)
+        # Create temporary directory for uploaded files
+        temp_folder = os.path.join('uploads', 'temp_answers')
+        os.makedirs(temp_folder, exist_ok=True)
+        # Process uploaded files
+        files = request.files.getlist('files[]')
+        data = {}
+        for file in files:
+            if file.filename.endswith(('.jpg', '.jpeg', '.png')):
+                # Get the relative path from the uploaded folder structure
+                relative_path = file.filename
+                # Extract student folder name (first directory in path)
+                path_parts = relative_path.split('/')
+                if len(path_parts) >= 2:
+                    student_folder = path_parts[0]
+                    # Create student directory if it doesn't exist
+                    student_path = os.path.join(temp_folder, student_folder)
+                    os.makedirs(student_path, exist_ok=True)
+                    # Save the file
+                    save_path = os.path.join(temp_folder, relative_path)
+                    os.makedirs(os.path.dirname(save_path), exist_ok=True)
+                    file.save(save_path)
+                    # Store file path in data dictionary
+                    if student_folder in data:
+                        data[student_folder].append(save_path)
+                    else:
+                        data[student_folder] = [save_path]
+        # Sort files for each student
+        for student in data:
+            data[student].sort()  # This will sort the file paths alphabetically
+        # Rest of your existing marking logic
+        s_marks = {}
+        sen_vec_answers = []
+        word_vec_answers = []
+        for i in answers:
+            temp_v = []
+            temp_w = []
+            for j in i:
+                temp_v.append(question_vector_sentence(j))
+                temp_w.append(question_vector_word(j))
+            sen_vec_answers.append(temp_v)
+            word_vec_answers.append(temp_w)
+        for i in data:
+            s_marks[i] = []
+            count = 0
+            for j in data[i]:
+                image_path = j
+                s_answer = extract_text_from_image(image_path)
+                tf_idf_word_values, max_tfidf = create_tfidf_values(answers[count])
+                m = marks(s_answer, sen_vec_answers[count], word_vec_answers[count],
+                         tf_idf_word_values, max_tfidf, answers[count])
+                if isinstance(m, torch.Tensor):
+                    m = m.item()
+                s_marks[i].append(m)
+                count += 1
+        # Cleanup temporary files
+        import shutil
+        shutil.rmtree(temp_folder)
+        return jsonify({"message": s_marks}), 200
+    except Exception as e:
+        return jsonify({"error": str(e)}), 500
+def marks(answer,sen_vec_answers,word_vec_answers,tf_idf_word_values, max_tfidf,correct_answers):
+    marks = 0
+    marks1 = tfidf_answer_score(answer,tf_idf_word_values,max_tfidf,marks =10)
+    if marks1>3:
+        marks += new_value(marks1, old_min = 3, old_max=10, new_min=0, new_max=5)
+        # print("TFIDF Score",float(marks))
+    if marks1>2:
+        marks2 = similarity_model_score(sen_vec_answers,answer)
+        a = 0
+        if marks2>0.95:
+            marks += 3
+            a = a+3
+        elif marks2>0.5:
+            marks += new_value(marks2, old_min = 0.5, old_max=0.95, new_min=0, new_max=3)
+            a = a+new_value(marks2, old_min = 0.5, old_max=0.95, new_min=0, new_max=3)
+        # print("sentence-transformers/all-MiniLM-L6-v2 with Cosine Similarity",a)
+        marks3 = fasttext_similarity(word_vec_answers,answer)
+        b = 0
+        if marks2>0.9:
+            marks += 2
+            b= b+2
+        elif marks3>0.4:
+            marks += new_value(marks3, old_min = 0.4, old_max=0.9, new_min=0, new_max=2)
+            b=b+new_value(marks3, old_min = 0.4, old_max=0.9, new_min=0, new_max=2)
+        # print("fasttext-wiki-news-subwords-300 with Soft Cosine Similarity",b)
+        marks4 = llm_score(correct_answers,answer)
+        for i in range(len(marks4)):
+            marks4[i] = float(marks4[i])
+        m = max(marks4)
+        # print("llm score",m/2)
+        marks = marks/2 + m/2
+        return marks
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=7860)

models/vit-base-beans/all_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "epoch": 2.2535211267605635,
+    "eval_loss": 0.011280356906354427,
+    "eval_precision": 1.0,
+    "eval_runtime": 120.2175,
+    "eval_samples_per_second": 2.096,
+    "eval_steps_per_second": 0.133,
+    "total_flos": 1.9806952545489715e+17,
+    "train_loss": 0.1075204591266811,
+    "train_runtime": 5558.7629,
+    "train_samples_per_second": 4.08,
+    "train_steps_per_second": 0.255
+}

models/vit-base-beans/config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "_name_or_path": "vitpre",
+  "architectures": [
+    "ViTForImageClassification"
+  ],
+  "attention_probs_dropout_prob": 0.0,
+  "encoder_stride": 16,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.0,
+  "hidden_size": 768,
+  "image_size": 224,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "model_type": "vit",
+  "num_attention_heads": 12,
+  "num_channels": 3,
+  "num_hidden_layers": 12,
+  "patch_size": 16,
+  "problem_type": "single_label_classification",
+  "qkv_bias": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.41.2"
+}

models/vit-base-beans/eval_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.2535211267605635,
+    "eval_loss": 0.011280356906354427,
+    "eval_precision": 1.0,
+    "eval_runtime": 120.2175,
+    "eval_samples_per_second": 2.096,
+    "eval_steps_per_second": 0.133
+}

models/vit-base-beans/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b75581f71443ac2edbb7a6b087a3bb52dd5da0df124ba42b2da04a4c257466f
+size 343223968

models/vit-base-beans/runs/Jun06_19-03-39_DELL/events.out.tfevents.1717715031.DELL.3528.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3365ccc0e41eb48d34859835f9df2f0ffe01b2e26801fc353474dd62e9b396b
+size 13489

models/vit-base-beans/runs/Jun06_19-03-39_DELL/events.out.tfevents.1717720711.DELL.3528.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:677f7fa63a4d50f27d37f5ab5aaf576d62ce8317a5e21a2889dc291b79b3e1c4
+size 412

models/vit-base-beans/train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 2.2535211267605635,
+    "total_flos": 1.9806952545489715e+17,
+    "train_loss": 0.1075204591266811,
+    "train_runtime": 5558.7629,
+    "train_samples_per_second": 4.08,
+    "train_steps_per_second": 0.255
+}

models/vit-base-beans/trainer_state.json ADDED Viewed

	@@ -0,0 +1,307 @@

+{
+  "best_metric": 0.011280356906354427,
+  "best_model_checkpoint": "./vit-base-beans\\checkpoint-130",
+  "epoch": 2.2535211267605635,
+  "eval_steps": 10,
+  "global_step": 160,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.14084507042253522,
+      "grad_norm": 3.220585584640503,
+      "learning_rate": 0.00019859154929577466,
+      "loss": 0.4675,
+      "step": 10
+    },
+    {
+      "epoch": 0.14084507042253522,
+      "eval_loss": 0.14601898193359375,
+      "eval_precision": 0.9767441860465116,
+      "eval_runtime": 115.8763,
+      "eval_samples_per_second": 2.175,
+      "eval_steps_per_second": 0.138,
+      "step": 10
+    },
+    {
+      "epoch": 0.28169014084507044,
+      "grad_norm": 0.3653267025947571,
+      "learning_rate": 0.0001971830985915493,
+      "loss": 0.2185,
+      "step": 20
+    },
+    {
+      "epoch": 0.28169014084507044,
+      "eval_loss": 0.18865476548671722,
+      "eval_precision": 0.9264705882352942,
+      "eval_runtime": 119.7761,
+      "eval_samples_per_second": 2.104,
+      "eval_steps_per_second": 0.134,
+      "step": 20
+    },
+    {
+      "epoch": 0.4225352112676056,
+      "grad_norm": 0.9396668672561646,
+      "learning_rate": 0.00019577464788732396,
+      "loss": 0.1316,
+      "step": 30
+    },
+    {
+      "epoch": 0.4225352112676056,
+      "eval_loss": 0.061195846647024155,
+      "eval_precision": 1.0,
+      "eval_runtime": 119.7047,
+      "eval_samples_per_second": 2.105,
+      "eval_steps_per_second": 0.134,
+      "step": 30
+    },
+    {
+      "epoch": 0.5633802816901409,
+      "grad_norm": 1.195994257926941,
+      "learning_rate": 0.00019436619718309861,
+      "loss": 0.081,
+      "step": 40
+    },
+    {
+      "epoch": 0.5633802816901409,
+      "eval_loss": 0.26516667008399963,
+      "eval_precision": 1.0,
+      "eval_runtime": 119.2044,
+      "eval_samples_per_second": 2.114,
+      "eval_steps_per_second": 0.134,
+      "step": 40
+    },
+    {
+      "epoch": 0.704225352112676,
+      "grad_norm": 13.78415584564209,
+      "learning_rate": 0.00019295774647887326,
+      "loss": 0.0995,
+      "step": 50
+    },
+    {
+      "epoch": 0.704225352112676,
+      "eval_loss": 0.14177252352237701,
+      "eval_precision": 0.968,
+      "eval_runtime": 120.7206,
+      "eval_samples_per_second": 2.087,
+      "eval_steps_per_second": 0.133,
+      "step": 50
+    },
+    {
+      "epoch": 0.8450704225352113,
+      "grad_norm": 5.696216106414795,
+      "learning_rate": 0.0001915492957746479,
+      "loss": 0.2069,
+      "step": 60
+    },
+    {
+      "epoch": 0.8450704225352113,
+      "eval_loss": 0.03753811493515968,
+      "eval_precision": 0.984375,
+      "eval_runtime": 122.9296,
+      "eval_samples_per_second": 2.05,
+      "eval_steps_per_second": 0.13,
+      "step": 60
+    },
+    {
+      "epoch": 0.9859154929577465,
+      "grad_norm": 3.0282273292541504,
+      "learning_rate": 0.00019014084507042254,
+      "loss": 0.0379,
+      "step": 70
+    },
+    {
+      "epoch": 0.9859154929577465,
+      "eval_loss": 0.07843092083930969,
+      "eval_precision": 0.9541984732824428,
+      "eval_runtime": 124.2593,
+      "eval_samples_per_second": 2.028,
+      "eval_steps_per_second": 0.129,
+      "step": 70
+    },
+    {
+      "epoch": 1.1267605633802817,
+      "grad_norm": 5.544398784637451,
+      "learning_rate": 0.0001887323943661972,
+      "loss": 0.1098,
+      "step": 80
+    },
+    {
+      "epoch": 1.1267605633802817,
+      "eval_loss": 0.11345015466213226,
+      "eval_precision": 0.9264705882352942,
+      "eval_runtime": 122.7101,
+      "eval_samples_per_second": 2.054,
+      "eval_steps_per_second": 0.13,
+      "step": 80
+    },
+    {
+      "epoch": 1.267605633802817,
+      "grad_norm": 2.5479578971862793,
+      "learning_rate": 0.00018732394366197184,
+      "loss": 0.0958,
+      "step": 90
+    },
+    {
+      "epoch": 1.267605633802817,
+      "eval_loss": 0.12344790995121002,
+      "eval_precision": 0.9130434782608695,
+      "eval_runtime": 177.1319,
+      "eval_samples_per_second": 1.423,
+      "eval_steps_per_second": 0.09,
+      "step": 90
+    },
+    {
+      "epoch": 1.408450704225352,
+      "grad_norm": 0.5559585690498352,
+      "learning_rate": 0.0001859154929577465,
+      "loss": 0.0762,
+      "step": 100
+    },
+    {
+      "epoch": 1.408450704225352,
+      "eval_loss": 0.07210251688957214,
+      "eval_precision": 1.0,
+      "eval_runtime": 124.2559,
+      "eval_samples_per_second": 2.028,
+      "eval_steps_per_second": 0.129,
+      "step": 100
+    },
+    {
+      "epoch": 1.5492957746478875,
+      "grad_norm": 0.07416976243257523,
+      "learning_rate": 0.00018450704225352114,
+      "loss": 0.0088,
+      "step": 110
+    },
+    {
+      "epoch": 1.5492957746478875,
+      "eval_loss": 0.01765807531774044,
+      "eval_precision": 1.0,
+      "eval_runtime": 214.9118,
+      "eval_samples_per_second": 1.173,
+      "eval_steps_per_second": 0.074,
+      "step": 110
+    },
+    {
+      "epoch": 1.6901408450704225,
+      "grad_norm": 0.0715404525399208,
+      "learning_rate": 0.0001830985915492958,
+      "loss": 0.0085,
+      "step": 120
+    },
+    {
+      "epoch": 1.6901408450704225,
+      "eval_loss": 0.14363093674182892,
+      "eval_precision": 0.9333333333333333,
+      "eval_runtime": 121.1058,
+      "eval_samples_per_second": 2.081,
+      "eval_steps_per_second": 0.132,
+      "step": 120
+    },
+    {
+      "epoch": 1.8309859154929577,
+      "grad_norm": 0.03990806266665459,
+      "learning_rate": 0.00018169014084507045,
+      "loss": 0.0071,
+      "step": 130
+    },
+    {
+      "epoch": 1.8309859154929577,
+      "eval_loss": 0.011280356906354427,
+      "eval_precision": 1.0,
+      "eval_runtime": 120.5316,
+      "eval_samples_per_second": 2.091,
+      "eval_steps_per_second": 0.133,
+      "step": 130
+    },
+    {
+      "epoch": 1.971830985915493,
+      "grad_norm": 0.03454509377479553,
+      "learning_rate": 0.00018028169014084507,
+      "loss": 0.0155,
+      "step": 140
+    },
+    {
+      "epoch": 1.971830985915493,
+      "eval_loss": 0.10382802784442902,
+      "eval_precision": 0.9545454545454546,
+      "eval_runtime": 121.0415,
+      "eval_samples_per_second": 2.082,
+      "eval_steps_per_second": 0.132,
+      "step": 140
+    },
+    {
+      "epoch": 2.112676056338028,
+      "grad_norm": 0.03133228421211243,
+      "learning_rate": 0.00017887323943661972,
+      "loss": 0.069,
+      "step": 150
+    },
+    {
+      "epoch": 2.112676056338028,
+      "eval_loss": 0.05120203271508217,
+      "eval_precision": 1.0,
+      "eval_runtime": 121.432,
+      "eval_samples_per_second": 2.075,
+      "eval_steps_per_second": 0.132,
+      "step": 150
+    },
+    {
+      "epoch": 2.2535211267605635,
+      "grad_norm": 0.033982835710048676,
+      "learning_rate": 0.00017746478873239437,
+      "loss": 0.0866,
+      "step": 160
+    },
+    {
+      "epoch": 2.2535211267605635,
+      "eval_loss": 0.0187800545245409,
+      "eval_precision": 0.9921259842519685,
+      "eval_runtime": 123.5401,
+      "eval_samples_per_second": 2.04,
+      "eval_steps_per_second": 0.13,
+      "step": 160
+    },
+    {
+      "epoch": 2.2535211267605635,
+      "step": 160,
+      "total_flos": 1.9806952545489715e+17,
+      "train_loss": 0.1075204591266811,
+      "train_runtime": 5558.7629,
+      "train_samples_per_second": 4.08,
+      "train_steps_per_second": 0.255
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1420,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
+  "save_steps": 10,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 5,
+        "early_stopping_threshold": 0.01
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.9806952545489715e+17,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

models/vit-base-beans/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e900ee99fbfc723afe73ab030c236d4f1d59bb636d495f08883046cfa74531c4
+size 5048

similarity_check/llm_based_scoring/llm.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import torch
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+from all_models import models
+# Remove these lines since we're using the singleton
+# MODEL_NAME = "google/flan-t5-xl"
+# model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
+# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+models.flan_model.to(device)
+def llm_score(correct_answers, answer):
+    score = []
+    for correct_answer in correct_answers:
+        print(correct_answer)
+        print(answer)
+        print()
+        print()
+        prompt = (
+            "You are an expert evaluator of answers. Your response must be a *single numeric score (0-10), not a range.*\n\n"
+            "The user's answer has been converted from handwriting using OCR, so minor spelling, punctuation, or small word variations may exist. "
+            "Focus on meaning rather than transcription errors.\n\n"
+            "### Evaluation Criteria:\n"
+            "- *Correctness (90% weight):* Does the answer accurately convey the meaning of the correct answer?\n"
+            "- *Completeness (10% weight):* Does it cover all key points?\n\n"
+            "### Handling OCR Errors:\n"
+            "- Ignore minor spelling/punctuation mistakes that don't affect meaning.\n"
+            "- Penalize only if word substitutions change the meaning.\n\n"
+            "### Scoring Guidelines:\n"
+            "- *10:* Fully correct and complete (90-100% accurate).\n"
+            "- *From 9 to 8:* Mostly correct, minor missing details (80-90% accurate).\n"
+            "- *From 7 to 6:* Good but missing some key points (60-80% accurate).\n"
+            "- *From 5 to 4:* Average, with several omissions/errors (40-60% accurate).\n"
+            "- *From 3 to 2:* Poor, major meaning errors (20-40% accurate).\n"
+            "- *From 1 to 0:* Incorrect or irrelevant (less than 20% accurate).\n\n"
+            "Compare the answers and assign a *single numeric score (0-10)* based on correctness and completeness.\n\n"
+            "Correct answer:\n"
+            f"{correct_answer}\n\n"
+            "User's answer:\n"
+            f"{answer}\n\n"
+            "Final Score (numeric only, strictly between 0 and 10):")
+        # Tokenize input prompt
+        inputs = models.flan_tokenizer(prompt, return_tensors="pt").to(device)
+        # Generate response
+        with torch.no_grad():
+            outputs = models.flan_model.generate(
+                **inputs,
+                max_length=2048,
+                do_sample=True,
+                num_return_sequences=1,
+                num_beams=5,
+                temperature=0.6,
+                top_p=0.9,
+                early_stopping=True,
+                pad_token_id=models.flan_tokenizer.pad_token_id,
+                eos_token_id=models.flan_tokenizer.eos_token_id,
+                bos_token_id=models.flan_tokenizer.bos_token_id,
+            )
+        # Decode and print response
+        print(models.flan_tokenizer.decode(outputs[0], skip_special_tokens=True))
+        score.append(models.flan_tokenizer.decode(outputs[0], skip_special_tokens=True))
+    return score

similarity_check/semantic_meaning_check/semantic.py ADDED Viewed

	@@ -0,0 +1,82 @@

+from sentence_transformers import util
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+from gensim.models import KeyedVectors
+import numpy as np
+import nltk
+from gensim import corpora
+from gensim.models import FastText
+from gensim.similarities import SparseTermSimilarityMatrix, WordEmbeddingSimilarityIndex
+from gensim.downloader import load
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+from all_models import models
+# Keep fasttext as is
+fasttext = load('fasttext-wiki-news-subwords-300')
+# nltk.download('punkt')
+# nltk.download('stopwords')
+def question_vector_sentence(correct_answer):
+    return models.similarity_model.encode(correct_answer, convert_to_tensor=True)
+def similarity_model_score(correct_answer_vector, answer):
+    answer_embedding = models.similarity_model.encode(answer, convert_to_tensor=True)
+    cosine_score = float('-inf')
+    for i in correct_answer_vector:
+        cosine_score = max(cosine_score, util.pytorch_cos_sim(i, answer_embedding))
+    return cosine_score
+def preprocess(sentence):
+    # Lowercase and remove punctuation
+    sentence = sentence.lower()
+    # Tokenize
+    words = word_tokenize(sentence)
+    # Remove stop words
+    words = [word for word in words if word not in stopwords.words('english')]
+    return words
+def sentence_to_vec(tokens, model):
+    # Filter words that are in the Word2Vec vocabulary
+    valid_words = [word for word in tokens if word in model]
+    # If there are no valid words, return a zero vector
+    if not valid_words:
+        return np.zeros(model.vector_size)
+    # Compute the average vector
+    word_vectors = [model[word] for word in valid_words]
+    sentence_vector = np.mean(word_vectors, axis=0)
+    return sentence_vector
+def compute_scm(tokens1, tokens2, model):
+    dictionary = corpora.Dictionary([tokens1, tokens2])
+    tokens1 = dictionary.doc2bow(tokens1)
+    tokens2 = dictionary.doc2bow(tokens2)
+    termsim_index = WordEmbeddingSimilarityIndex(model)
+    termsim_matrix = SparseTermSimilarityMatrix(termsim_index, dictionary)
+    similarity = termsim_matrix.inner_product(tokens1, tokens2, normalized=(True, True))
+    return similarity
+def question_vector_word(correct_answer):
+    return preprocess(correct_answer)
+def fasttext_similarity(correct_answer_vector, answer):
+    preprocess_answer = preprocess(answer)
+    soft_cosine = float('-inf')
+    for i in correct_answer_vector:
+        soft_cosine = max(compute_scm(i, preprocess_answer, fasttext), soft_cosine)
+    return soft_cosine

similarity_check/tf_idf/tf_idf_score.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import nltk
+from nltk.corpus import stopwords
+from nltk.tokenize import word_tokenize
+from nltk.corpus import wordnet
+from collections import Counter
+import string
+# Check and download required NLTK packages
+try:
+    stopwords.words('english')
+except LookupError:
+    print("Downloading required NLTK data...")
+    nltk.download('stopwords')
+    nltk.download('punkt')
+    nltk.download('wordnet')
+def remove_stopwords(sentence):
+    # converting into words
+    words = word_tokenize(sentence)
+    # Get the set of English stop words
+    stop_words = set(stopwords.words('english'))
+    # Remove stop words from the list of words
+    filtered_words = [word for word in words if word.lower() not in stop_words]
+    words = [word.lower() for word in words if word.isalpha() and len(word)>1]
+    return words
+def get_synonyms(word):
+    synonyms = set()
+    for syn in wordnet.synsets(word):
+        for lemma in syn.lemmas():
+            synonyms.add(lemma.name().lower())
+    return synonyms
+def process_sentence(words):
+    # Find synonyms for each word
+    synonym_map = {}
+    for word in words:
+        synonyms = get_synonyms(word)
+        synonyms.add(word)  # Ensure the word itself is included if no synonyms are found
+        synonym_map[word] = list(synonyms)
+    return synonym_map
+def tf(dict1):
+#     print(dict1)
+    no_of_terms_in_document = len(dict1)
+    word_frequency = {}
+    for i in dict1:
+        count = 0
+        for j in dict1:
+            if i in dict1[j]:
+                count+=1
+        word_frequency[i] = count
+#     print(word_frequency)
+    for i in word_frequency:
+        word_frequency[i] = word_frequency[i]/no_of_terms_in_document
+    return word_frequency
+def idf(di):
+    no_of_documents = len(di)
+    new_dict = {}
+    for d in range(len(di)):
+        for i in di[d]:
+            if i not in new_dict:
+                new_dict[i]=set()
+                new_dict[i].add(d)
+            else:
+                new_dict[i].add(d)
+    r = {}
+    for i in new_dict:
+        r[i]=len(new_dict[i])/no_of_documents
+    return r
+def total_tf_idf_value(tf_idf_word_values,synonyms_words):
+    value = 0
+    for i in synonyms_words:
+        for j in synonyms_words[i]:
+            if j in tf_idf_word_values:
+                value += tf_idf_word_values[j]
+                break
+    return value
+def create_tfidf_values(correct_answer):
+    correct_answer_words = []
+    for i in correct_answer:
+        correct_answer_words.append(remove_stopwords(i))
+    correct_synonyms_words = []
+    for i in correct_answer_words:
+        correct_synonyms_words.append(process_sentence(i))
+    tf_ = []
+    for i in correct_synonyms_words:
+        tf_.append(tf(i))
+    idf_values = idf(correct_synonyms_words)
+    tf_idf_word_values = {}
+    count = 0
+    for correct_synonyms_word in correct_synonyms_words:
+        for i in correct_synonyms_word:
+            value = tf_[count][i]*idf_values[i]
+            if i in tf_idf_word_values:
+                tf_idf_word_values[i] = max(tf_idf_word_values[i],value)
+            else:
+                tf_idf_word_values[i] = value
+        count+=1
+    for i in tf_idf_word_values:
+        tf_idf_word_values[i] =  round(tf_idf_word_values[i], 4)
+    tfidf_correct_ans = float('inf')
+    for i in correct_synonyms_words:
+        tfidf_correct_ans = min(total_tf_idf_value(tf_idf_word_values,i),tfidf_correct_ans)
+    return tf_idf_word_values,tfidf_correct_ans
+def tfidf_answer_score(answer,tf_idf_word_values,max_tfidf,marks=10):
+    answer = remove_stopwords(answer)
+    answer_synonyms_words = process_sentence(answer)
+    value = total_tf_idf_value(tf_idf_word_values,answer_synonyms_words)
+    # print("tfidf value of answer: ",value, "  ,  " "minimum tfidf value of correct answer answer: " ,max_tfidf)
+    score = (value/max_tfidf)*marks
+    # print(score)
+    if score>10:
+        return 10
+    else:
+        return score

templates/index.html ADDED Viewed

	@@ -0,0 +1,367 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Answer Generation</title>
+    <!-- Add Google Fonts -->
+    <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;500;600&display=swap" rel="stylesheet">
+    <style>
+        :root {
+            --primary-color: #4361ee;
+            --secondary-color: #3f37c9;
+            --accent-color: #4895ef;
+            --background-color: #f8f9fa;
+            --text-color: #2b2d42;
+            --border-radius: 8px;
+            --box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
+        }
+        body {
+            font-family: 'Poppins', sans-serif;
+            margin: 0;
+            padding: 2rem;
+            background-color: var(--background-color);
+            color: var(--text-color);
+            line-height: 1.6;
+        }
+        .container {
+            max-width: 1200px;
+            margin: 0 auto;
+            padding: 2rem;
+            background: white;
+            border-radius: var(--border-radius);
+            box-shadow: var(--box-shadow);
+        }
+        h2 {
+            color: var(--primary-color);
+            margin-bottom: 1.5rem;
+            font-weight: 600;
+            position: relative;
+            padding-bottom: 0.5rem;
+        }
+        h2::after {
+            content: '';
+            position: absolute;
+            bottom: 0;
+            left: 0;
+            width: 50px;
+            height: 3px;
+            background-color: var(--accent-color);
+            border-radius: 2px;
+        }
+        .section {
+            background: white;
+            padding: 1.5rem;
+            border-radius: var(--border-radius);
+            margin-bottom: 2rem;
+            box-shadow: var(--box-shadow);
+        }
+        .upload-container {
+            margin-bottom: 1.5rem;
+        }
+        label {
+            display: block;
+            margin-bottom: 0.5rem;
+            font-weight: 500;
+            color: var(--text-color);
+        }
+        input[type="file"] {
+            width: 100%;
+            padding: 0.5rem;
+            margin-bottom: 1rem;
+            border: 2px dashed var(--accent-color);
+            border-radius: var(--border-radius);
+            background: #f8f9fa;
+            cursor: pointer;
+        }
+        input[type="file"]:hover {
+            border-color: var(--primary-color);
+        }
+        select {
+            width: 100%;
+            padding: 0.8rem;
+            border: 1px solid #ddd;
+            border-radius: var(--border-radius);
+            margin-bottom: 1rem;
+            font-family: 'Poppins', sans-serif;
+            appearance: none;
+            background: white url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='16' height='16' fill='%23444' viewBox='0 0 16 16'%3E%3Cpath d='M8 12L2 6h12z'/%3E%3C/svg%3E") no-repeat right 0.8rem center;
+        }
+        button {
+            background-color: var(--primary-color);
+            color: white;
+            border: none;
+            padding: 0.8rem 1.5rem;
+            border-radius: var(--border-radius);
+            cursor: pointer;
+            font-weight: 500;
+            transition: all 0.3s ease;
+            font-family: 'Poppins', sans-serif;
+            width: 100%;
+            margin-bottom: 1rem;
+        }
+        button:hover {
+            background-color: var(--secondary-color);
+            transform: translateY(-2px);
+            box-shadow: 0 4px 12px rgba(67, 97, 238, 0.3);
+        }
+        .answer-box {
+            width: 100%;
+            min-height: 100px;
+            padding: 1rem;
+            margin-bottom: 1rem;
+            border: 1px solid #ddd;
+            border-radius: var(--border-radius);
+            font-family: 'Poppins', sans-serif;
+            resize: vertical;
+            transition: border-color 0.3s ease;
+        }
+        .answer-box:focus {
+            outline: none;
+            border-color: var(--accent-color);
+            box-shadow: 0 0 0 3px rgba(72, 149, 239, 0.2);
+        }
+        table {
+            width: 100%;
+            border-collapse: separate;
+            border-spacing: 0;
+            margin-top: 1.5rem;
+            background: white;
+            border-radius: var(--border-radius);
+            overflow: hidden;
+            box-shadow: var(--box-shadow);
+        }
+        th, td {
+            padding: 1rem;
+            text-align: left;
+            border-bottom: 1px solid #eee;
+        }
+        th {
+            background-color: var(--primary-color);
+            color: white;
+            font-weight: 500;
+        }
+        tr:hover {
+            background-color: #f8f9fa;
+        }
+        .hidden {
+            display: none;
+        }
+        /* Responsive Design */
+        @media (max-width: 768px) {
+            body {
+                padding: 1rem;
+            }
+            .container {
+                padding: 1rem;
+            }
+            button {
+                padding: 0.7rem 1rem;
+            }
+        }
+        /* Animation */
+        @keyframes fadeIn {
+            from { opacity: 0; transform: translateY(10px); }
+            to { opacity: 1; transform: translateY(0); }
+        }
+        .section {
+            animation: fadeIn 0.5s ease-out;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <div class="section">
+            <h2>Upload Query CSV File</h2>
+            <div id="query-upload">
+                <label for="query-file">Query File:</label>
+                <div class="upload-container">
+                    <input type="file" id="query-file" accept=".csv">
+                </div>
+            </div>
+        </div>
+        <div class="section">
+            <h2>Answer Generation</h2>
+            <label for="file-type">Select File Type:</label>
+            <select id="file-type" onchange="handleFileTypeChange()">
+                <option value="pdf">PDF</option>
+                <option value="csv">CSV</option>
+            </select>
+            <div id="csv-upload" class="hidden">
+                <label for="csv-file">Upload Answer CSV File:</label>
+                <div class="upload-container">
+                    <input type="file" id="csv-file" accept=".csv">
+                </div>
+            </div>
+            <button id="compute-btn" onclick="computeAnswers()">Compute Answers</button>
+        </div>
+        <div class="section">
+            <h2>Student Answers Upload</h2>
+            <label for="folder-upload">Upload Student Answers Folder:</label>
+            <div class="upload-container">
+                <input type="file" id="folder-upload" webkitdirectory directory multiple>
+                <small class="help-text">Select the folder containing student answer images</small>
+            </div>
+        </div>
+        <div class="section">
+            <div id="answers-container"></div>
+            <button id="compute-marks-btn" onclick="computeMarks()">Compute Marks</button>
+            <div id="marks-table-container"></div>
+        </div>
+    </div>
+    <script>
+        function handleFileTypeChange() {
+            const fileType = document.getElementById('file-type').value;
+            const csvUpload = document.getElementById('csv-upload');
+            if (fileType === 'csv') {
+                csvUpload.classList.remove('hidden'); // Show the CSV upload section
+            } else {
+                csvUpload.classList.add('hidden'); // Hide the CSV upload section
+            }
+        }
+        async function computeAnswers() {
+            try {
+                const fileType = document.getElementById('file-type').value;
+                const queryfile = document.getElementById('query-file').files[0];
+                const anscsvFile = document.getElementById('csv-file').files[0];
+                const formData = new FormData();
+                formData.append('file_type', fileType);
+                formData.append('query_file', queryfile);
+                if (anscsvFile) {
+                    formData.append('ans_csv_file', anscsvFile);
+                }
+                const response = await fetch('/compute_answers', { method: 'POST', body: formData });
+                const result = await response.json();
+                if (result.answers) {
+                    displayAnswers(result.answers);
+                } else {
+                    console.error('No answers received:', result);
+                }
+            } catch (error) {
+                console.error('Error:', error);
+            }
+        }
+        function displayAnswers(answers) {
+            const container = document.getElementById('answers-container');
+            container.innerHTML = ''; // Clear previous answers
+            answers.forEach(answer => {
+                const textBox = document.createElement('textarea');
+                textBox.className = 'answer-box';
+                textBox.value = answer.join('\n\n'); // Set the answer as the value of the text box
+                container.appendChild(textBox);
+            });
+        }
+        async function computeMarks() {
+            try {
+                const answerBoxes = document.querySelectorAll('.answer-box');
+                const answers = answerBoxes.length === 1 ? [answerBoxes[0].value.trim()] : Array.from(answerBoxes).map(box => box.value.trim());
+                // Create FormData and append answers
+                const formData = new FormData();
+                formData.append('answers', JSON.stringify(answers));
+                // Handle folder upload
+                const folderInput = document.getElementById('folder-upload');
+                const files = folderInput.files;
+                // Append each file with its relative path
+                for (let i = 0; i < files.length; i++) {
+                    const file = files[i];
+                    const relativePath = file.webkitRelativePath;
+                    formData.append('files[]', file, relativePath);
+                }
+                const response = await fetch('/compute_marks', {
+                    method: 'POST',
+                    body: formData
+                });
+                const result = await response.json();
+                if (result) {
+                    displayMarksTable(result);
+                } else {
+                    console.error('No marks data received:', result);
+                }
+            } catch (error) {
+                console.error('Error:', error);
+            }
+        }
+        function displayMarksTable(data) {
+            // Access the 'message' property which contains the actual marks data
+            const marksData = data.message;
+            console.log('Marks Data Received:', marksData);
+            const container = document.getElementById('marks-table-container');
+            container.innerHTML = ''; // Clear previous table
+            // Create table
+            const table = document.createElement('table');
+            table.innerHTML = `
+                <tr>
+                    <th>Name</th>
+                    <th>Question Number</th>
+                    <th>Marks</th>
+                </tr>
+            `;
+            for (const [name, marks] of Object.entries(marksData)) {
+                if (!Array.isArray(marks)) {
+                    console.error('Invalid marks for ${name}:', marks);
+                    continue; // Skip invalid entries
+                }
+                marks.forEach((mark, index) => {
+                    const row = document.createElement('tr');
+                    row.innerHTML = `
+                        <td>${name}</td>
+                        <td>${index + 1}</td>
+                        <td>${mark.toFixed(2)}</td>
+                    `;
+                    table.appendChild(row);
+                });
+            }
+            container.appendChild(table);
+        }
+    </script>
+</body>
+</html>