Commit
·
2d51954
1
Parent(s):
372999e
printing extracted text
Browse files
main.py
CHANGED
@@ -536,29 +536,41 @@ def compute_marks():
|
|
536 |
count = 0
|
537 |
for filename, image_path in sorted_images:
|
538 |
try:
|
|
|
539 |
s_answer = extract_text_from_image(image_path)
|
540 |
-
logger.info(f"
|
|
|
541 |
|
542 |
if not s_answer:
|
543 |
logger.warning(f"No text extracted from {image_path}")
|
544 |
results.append({
|
545 |
"subfolder": student,
|
546 |
"image": filename,
|
547 |
-
"marks": 0
|
|
|
|
|
548 |
})
|
549 |
count += 1
|
550 |
continue
|
551 |
|
|
|
552 |
tf_idf_word_values, max_tfidf = create_tfidf_values(answers[count])
|
|
|
|
|
|
|
553 |
m = marks(s_answer, sen_vec_answers[count], word_vec_answers[count],
|
554 |
tf_idf_word_values, max_tfidf, answers[count])
|
555 |
|
556 |
if isinstance(m, torch.Tensor):
|
557 |
m = m.item()
|
|
|
|
|
558 |
results.append({
|
559 |
"subfolder": student,
|
560 |
"image": filename,
|
561 |
-
"marks": round(m, 2)
|
|
|
|
|
562 |
})
|
563 |
count += 1
|
564 |
|
@@ -570,7 +582,9 @@ def compute_marks():
|
|
570 |
results.append({
|
571 |
"subfolder": student,
|
572 |
"image": filename,
|
573 |
-
"marks": 0
|
|
|
|
|
574 |
})
|
575 |
count += 1
|
576 |
continue
|
@@ -586,10 +600,18 @@ def compute_marks():
|
|
586 |
# Final memory cleanup
|
587 |
cleanup_memory()
|
588 |
|
589 |
-
return jsonify({
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
590 |
|
591 |
except Exception as e:
|
592 |
error_msg = str(e)
|
|
|
593 |
notification_queue.put({
|
594 |
"type": "error",
|
595 |
"message": error_msg
|
@@ -602,41 +624,65 @@ def compute_marks():
|
|
602 |
def marks(answer, sen_vec_answers, word_vec_answers, tf_idf_word_values, max_tfidf, correct_answers):
|
603 |
try:
|
604 |
marks = 0
|
|
|
|
|
|
|
|
|
605 |
marks1 = tfidf_answer_score(answer, tf_idf_word_values, max_tfidf, marks=10)
|
|
|
606 |
|
607 |
if marks1 > 3:
|
608 |
-
|
609 |
-
|
|
|
610 |
|
611 |
if marks1 > 2:
|
|
|
612 |
marks2 = similarity_model_score(sen_vec_answers, answer)
|
|
|
|
|
613 |
a = 0
|
614 |
if marks2 > 0.95:
|
615 |
marks += 3
|
616 |
-
a =
|
|
|
617 |
elif marks2 > 0.5:
|
618 |
-
|
619 |
-
|
620 |
-
|
|
|
621 |
|
|
|
622 |
marks3 = fasttext_similarity(word_vec_answers, answer)
|
|
|
|
|
623 |
b = 0
|
624 |
if marks2 > 0.9:
|
625 |
marks += 2
|
626 |
-
b =
|
|
|
627 |
elif marks3 > 0.4:
|
628 |
-
|
629 |
-
|
630 |
-
|
|
|
631 |
|
|
|
632 |
marks4 = llm_score(correct_answers, answer)
|
|
|
|
|
633 |
for i in range(len(marks4)):
|
634 |
marks4[i] = float(marks4[i])
|
635 |
|
636 |
m = max(marks4)
|
637 |
-
logger.info(f"LLM score: {m
|
638 |
-
|
|
|
|
|
|
|
|
|
639 |
|
|
|
640 |
return marks
|
641 |
|
642 |
except Exception as e:
|
|
|
536 |
count = 0
|
537 |
for filename, image_path in sorted_images:
|
538 |
try:
|
539 |
+
# Extract text from image
|
540 |
s_answer = extract_text_from_image(image_path)
|
541 |
+
logger.info(f"Processing student: {student}, image: {filename}")
|
542 |
+
logger.info(f"Extracted text: {s_answer}")
|
543 |
|
544 |
if not s_answer:
|
545 |
logger.warning(f"No text extracted from {image_path}")
|
546 |
results.append({
|
547 |
"subfolder": student,
|
548 |
"image": filename,
|
549 |
+
"marks": 0,
|
550 |
+
"extracted_text": "",
|
551 |
+
"error": "No text could be extracted from image"
|
552 |
})
|
553 |
count += 1
|
554 |
continue
|
555 |
|
556 |
+
# Calculate TF-IDF scores
|
557 |
tf_idf_word_values, max_tfidf = create_tfidf_values(answers[count])
|
558 |
+
logger.info(f"TF-IDF max value: {max_tfidf}")
|
559 |
+
|
560 |
+
# Calculate marks
|
561 |
m = marks(s_answer, sen_vec_answers[count], word_vec_answers[count],
|
562 |
tf_idf_word_values, max_tfidf, answers[count])
|
563 |
|
564 |
if isinstance(m, torch.Tensor):
|
565 |
m = m.item()
|
566 |
+
|
567 |
+
# Add result with extracted text
|
568 |
results.append({
|
569 |
"subfolder": student,
|
570 |
"image": filename,
|
571 |
+
"marks": round(m, 2),
|
572 |
+
"extracted_text": s_answer,
|
573 |
+
"correct_answer": answers[count]
|
574 |
})
|
575 |
count += 1
|
576 |
|
|
|
582 |
results.append({
|
583 |
"subfolder": student,
|
584 |
"image": filename,
|
585 |
+
"marks": 0,
|
586 |
+
"extracted_text": "",
|
587 |
+
"error": str(e)
|
588 |
})
|
589 |
count += 1
|
590 |
continue
|
|
|
600 |
# Final memory cleanup
|
601 |
cleanup_memory()
|
602 |
|
603 |
+
return jsonify({
|
604 |
+
"results": results,
|
605 |
+
"debug_info": {
|
606 |
+
"total_students": len(data),
|
607 |
+
"total_answers": len(answers),
|
608 |
+
"answers_processed": count
|
609 |
+
}
|
610 |
+
}), 200
|
611 |
|
612 |
except Exception as e:
|
613 |
error_msg = str(e)
|
614 |
+
logger.error(f"Error in compute_marks: {error_msg}")
|
615 |
notification_queue.put({
|
616 |
"type": "error",
|
617 |
"message": error_msg
|
|
|
624 |
def marks(answer, sen_vec_answers, word_vec_answers, tf_idf_word_values, max_tfidf, correct_answers):
|
625 |
try:
|
626 |
marks = 0
|
627 |
+
logger.info(f"Starting marks calculation for answer: {answer}")
|
628 |
+
logger.info(f"Correct answers: {correct_answers}")
|
629 |
+
|
630 |
+
# Calculate TF-IDF score
|
631 |
marks1 = tfidf_answer_score(answer, tf_idf_word_values, max_tfidf, marks=10)
|
632 |
+
logger.info(f"Initial TF-IDF score: {marks1}")
|
633 |
|
634 |
if marks1 > 3:
|
635 |
+
tfidf_contribution = new_value(marks1, old_min=3, old_max=10, new_min=0, new_max=5)
|
636 |
+
marks += tfidf_contribution
|
637 |
+
logger.info(f"TF-IDF contribution (>3): {tfidf_contribution}")
|
638 |
|
639 |
if marks1 > 2:
|
640 |
+
# Calculate sentence transformer score
|
641 |
marks2 = similarity_model_score(sen_vec_answers, answer)
|
642 |
+
logger.info(f"Sentence transformer raw score: {marks2}")
|
643 |
+
|
644 |
a = 0
|
645 |
if marks2 > 0.95:
|
646 |
marks += 3
|
647 |
+
a = 3
|
648 |
+
logger.info("High sentence similarity (>0.95): +3 marks")
|
649 |
elif marks2 > 0.5:
|
650 |
+
sentence_contribution = new_value(marks2, old_min=0.5, old_max=0.95, new_min=0, new_max=3)
|
651 |
+
marks += sentence_contribution
|
652 |
+
a = sentence_contribution
|
653 |
+
logger.info(f"Medium sentence similarity (>0.5): +{sentence_contribution} marks")
|
654 |
|
655 |
+
# Calculate FastText similarity
|
656 |
marks3 = fasttext_similarity(word_vec_answers, answer)
|
657 |
+
logger.info(f"FastText similarity raw score: {marks3}")
|
658 |
+
|
659 |
b = 0
|
660 |
if marks2 > 0.9:
|
661 |
marks += 2
|
662 |
+
b = 2
|
663 |
+
logger.info("High word similarity (>0.9): +2 marks")
|
664 |
elif marks3 > 0.4:
|
665 |
+
word_contribution = new_value(marks3, old_min=0.4, old_max=0.9, new_min=0, new_max=2)
|
666 |
+
marks += word_contribution
|
667 |
+
b = word_contribution
|
668 |
+
logger.info(f"Medium word similarity (>0.4): +{word_contribution} marks")
|
669 |
|
670 |
+
# Calculate LLM score
|
671 |
marks4 = llm_score(correct_answers, answer)
|
672 |
+
logger.info(f"Raw LLM scores: {marks4}")
|
673 |
+
|
674 |
for i in range(len(marks4)):
|
675 |
marks4[i] = float(marks4[i])
|
676 |
|
677 |
m = max(marks4)
|
678 |
+
logger.info(f"Max LLM score: {m}")
|
679 |
+
|
680 |
+
# Final score calculation
|
681 |
+
final_score = marks/2 + m/2
|
682 |
+
logger.info(f"Final score calculation: (marks={marks}/2 + llm={m}/2) = {final_score}")
|
683 |
+
marks = final_score
|
684 |
|
685 |
+
logger.info(f"Final marks awarded: {marks}")
|
686 |
return marks
|
687 |
|
688 |
except Exception as e:
|