yamanavijayavardhan commited on
Commit
2d51954
·
1 Parent(s): 372999e

printing extracted text

Browse files
Files changed (1) hide show
  1. main.py +63 -17
main.py CHANGED
@@ -536,29 +536,41 @@ def compute_marks():
536
  count = 0
537
  for filename, image_path in sorted_images:
538
  try:
 
539
  s_answer = extract_text_from_image(image_path)
540
- logger.info(f"Extracted text from {image_path}: {s_answer}")
 
541
 
542
  if not s_answer:
543
  logger.warning(f"No text extracted from {image_path}")
544
  results.append({
545
  "subfolder": student,
546
  "image": filename,
547
- "marks": 0
 
 
548
  })
549
  count += 1
550
  continue
551
 
 
552
  tf_idf_word_values, max_tfidf = create_tfidf_values(answers[count])
 
 
 
553
  m = marks(s_answer, sen_vec_answers[count], word_vec_answers[count],
554
  tf_idf_word_values, max_tfidf, answers[count])
555
 
556
  if isinstance(m, torch.Tensor):
557
  m = m.item()
 
 
558
  results.append({
559
  "subfolder": student,
560
  "image": filename,
561
- "marks": round(m, 2) # Round marks to 2 decimal places
 
 
562
  })
563
  count += 1
564
 
@@ -570,7 +582,9 @@ def compute_marks():
570
  results.append({
571
  "subfolder": student,
572
  "image": filename,
573
- "marks": 0
 
 
574
  })
575
  count += 1
576
  continue
@@ -586,10 +600,18 @@ def compute_marks():
586
  # Final memory cleanup
587
  cleanup_memory()
588
 
589
- return jsonify({"results": results}), 200
 
 
 
 
 
 
 
590
 
591
  except Exception as e:
592
  error_msg = str(e)
 
593
  notification_queue.put({
594
  "type": "error",
595
  "message": error_msg
@@ -602,41 +624,65 @@ def compute_marks():
602
  def marks(answer, sen_vec_answers, word_vec_answers, tf_idf_word_values, max_tfidf, correct_answers):
603
  try:
604
  marks = 0
 
 
 
 
605
  marks1 = tfidf_answer_score(answer, tf_idf_word_values, max_tfidf, marks=10)
 
606
 
607
  if marks1 > 3:
608
- marks += new_value(marks1, old_min=3, old_max=10, new_min=0, new_max=5)
609
- logger.info(f"TFIDF Score: {float(marks)}")
 
610
 
611
  if marks1 > 2:
 
612
  marks2 = similarity_model_score(sen_vec_answers, answer)
 
 
613
  a = 0
614
  if marks2 > 0.95:
615
  marks += 3
616
- a = a + 3
 
617
  elif marks2 > 0.5:
618
- marks += new_value(marks2, old_min=0.5, old_max=0.95, new_min=0, new_max=3)
619
- a = a + new_value(marks2, old_min=0.5, old_max=0.95, new_min=0, new_max=3)
620
- logger.info(f"Sentence transformers score: {a}")
 
621
 
 
622
  marks3 = fasttext_similarity(word_vec_answers, answer)
 
 
623
  b = 0
624
  if marks2 > 0.9:
625
  marks += 2
626
- b = b + 2
 
627
  elif marks3 > 0.4:
628
- marks += new_value(marks3, old_min=0.4, old_max=0.9, new_min=0, new_max=2)
629
- b = b + new_value(marks3, old_min=0.4, old_max=0.9, new_min=0, new_max=2)
630
- logger.info(f"Fasttext score: {b}")
 
631
 
 
632
  marks4 = llm_score(correct_answers, answer)
 
 
633
  for i in range(len(marks4)):
634
  marks4[i] = float(marks4[i])
635
 
636
  m = max(marks4)
637
- logger.info(f"LLM score: {m/2}")
638
- marks = marks/2 + m/2
 
 
 
 
639
 
 
640
  return marks
641
 
642
  except Exception as e:
 
536
  count = 0
537
  for filename, image_path in sorted_images:
538
  try:
539
+ # Extract text from image
540
  s_answer = extract_text_from_image(image_path)
541
+ logger.info(f"Processing student: {student}, image: {filename}")
542
+ logger.info(f"Extracted text: {s_answer}")
543
 
544
  if not s_answer:
545
  logger.warning(f"No text extracted from {image_path}")
546
  results.append({
547
  "subfolder": student,
548
  "image": filename,
549
+ "marks": 0,
550
+ "extracted_text": "",
551
+ "error": "No text could be extracted from image"
552
  })
553
  count += 1
554
  continue
555
 
556
+ # Calculate TF-IDF scores
557
  tf_idf_word_values, max_tfidf = create_tfidf_values(answers[count])
558
+ logger.info(f"TF-IDF max value: {max_tfidf}")
559
+
560
+ # Calculate marks
561
  m = marks(s_answer, sen_vec_answers[count], word_vec_answers[count],
562
  tf_idf_word_values, max_tfidf, answers[count])
563
 
564
  if isinstance(m, torch.Tensor):
565
  m = m.item()
566
+
567
+ # Add result with extracted text
568
  results.append({
569
  "subfolder": student,
570
  "image": filename,
571
+ "marks": round(m, 2),
572
+ "extracted_text": s_answer,
573
+ "correct_answer": answers[count]
574
  })
575
  count += 1
576
 
 
582
  results.append({
583
  "subfolder": student,
584
  "image": filename,
585
+ "marks": 0,
586
+ "extracted_text": "",
587
+ "error": str(e)
588
  })
589
  count += 1
590
  continue
 
600
  # Final memory cleanup
601
  cleanup_memory()
602
 
603
+ return jsonify({
604
+ "results": results,
605
+ "debug_info": {
606
+ "total_students": len(data),
607
+ "total_answers": len(answers),
608
+ "answers_processed": count
609
+ }
610
+ }), 200
611
 
612
  except Exception as e:
613
  error_msg = str(e)
614
+ logger.error(f"Error in compute_marks: {error_msg}")
615
  notification_queue.put({
616
  "type": "error",
617
  "message": error_msg
 
624
  def marks(answer, sen_vec_answers, word_vec_answers, tf_idf_word_values, max_tfidf, correct_answers):
625
  try:
626
  marks = 0
627
+ logger.info(f"Starting marks calculation for answer: {answer}")
628
+ logger.info(f"Correct answers: {correct_answers}")
629
+
630
+ # Calculate TF-IDF score
631
  marks1 = tfidf_answer_score(answer, tf_idf_word_values, max_tfidf, marks=10)
632
+ logger.info(f"Initial TF-IDF score: {marks1}")
633
 
634
  if marks1 > 3:
635
+ tfidf_contribution = new_value(marks1, old_min=3, old_max=10, new_min=0, new_max=5)
636
+ marks += tfidf_contribution
637
+ logger.info(f"TF-IDF contribution (>3): {tfidf_contribution}")
638
 
639
  if marks1 > 2:
640
+ # Calculate sentence transformer score
641
  marks2 = similarity_model_score(sen_vec_answers, answer)
642
+ logger.info(f"Sentence transformer raw score: {marks2}")
643
+
644
  a = 0
645
  if marks2 > 0.95:
646
  marks += 3
647
+ a = 3
648
+ logger.info("High sentence similarity (>0.95): +3 marks")
649
  elif marks2 > 0.5:
650
+ sentence_contribution = new_value(marks2, old_min=0.5, old_max=0.95, new_min=0, new_max=3)
651
+ marks += sentence_contribution
652
+ a = sentence_contribution
653
+ logger.info(f"Medium sentence similarity (>0.5): +{sentence_contribution} marks")
654
 
655
+ # Calculate FastText similarity
656
  marks3 = fasttext_similarity(word_vec_answers, answer)
657
+ logger.info(f"FastText similarity raw score: {marks3}")
658
+
659
  b = 0
660
  if marks2 > 0.9:
661
  marks += 2
662
+ b = 2
663
+ logger.info("High word similarity (>0.9): +2 marks")
664
  elif marks3 > 0.4:
665
+ word_contribution = new_value(marks3, old_min=0.4, old_max=0.9, new_min=0, new_max=2)
666
+ marks += word_contribution
667
+ b = word_contribution
668
+ logger.info(f"Medium word similarity (>0.4): +{word_contribution} marks")
669
 
670
+ # Calculate LLM score
671
  marks4 = llm_score(correct_answers, answer)
672
+ logger.info(f"Raw LLM scores: {marks4}")
673
+
674
  for i in range(len(marks4)):
675
  marks4[i] = float(marks4[i])
676
 
677
  m = max(marks4)
678
+ logger.info(f"Max LLM score: {m}")
679
+
680
+ # Final score calculation
681
+ final_score = marks/2 + m/2
682
+ logger.info(f"Final score calculation: (marks={marks}/2 + llm={m}/2) = {final_score}")
683
+ marks = final_score
684
 
685
+ logger.info(f"Final marks awarded: {marks}")
686
  return marks
687
 
688
  except Exception as e: