kerols77 commited on
Commit
40cda9e
Β·
verified Β·
1 Parent(s): 90c8539

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +431 -0
  2. data/advice.csv +0 -0
  3. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,431 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import cv2
3
+ import spacy
4
+ import numpy as np
5
+ import os
6
+ import string
7
+ import csv
8
+ import random
9
+ import json
10
+ import requests
11
+ from collections import OrderedDict
12
+ from flask import Flask, request, Response
13
+ from paddleocr import PaddleOCR
14
+ from sentence_transformers import SentenceTransformer, util
15
+ from transformers import pipeline
16
+
17
+ # Ensure the language model is available
18
+ try:
19
+ import en_core_web_md
20
+ except ImportError:
21
+ print("en_core_web_md not found. Downloading now...")
22
+ import spacy.cli
23
+ spacy.cli.download("en_core_web_md")
24
+ import en_core_web_md
25
+
26
+ # Load the model using one method.
27
+ nlp = en_core_web_md.load()
28
+
29
+ # Initialize other components
30
+ ochr = PaddleOCR(use_angle_cls=True, lang='en')
31
+ sbert_model = SentenceTransformer("all-mpnet-base-v2")
32
+ entailment_classifier = pipeline(
33
+ "text-classification",
34
+ model="roberta-large-mnli",
35
+ return_all_scores=True
36
+ )
37
+
38
+ app = Flask(__name__)
39
+
40
+ def classify_subject(question, candidate_labels=None):
41
+ if candidate_labels is None:
42
+ candidate_labels = ["Math", "Science", "History", "Literature", "Geography", "Art"]
43
+ classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
44
+ result = classifier(question, candidate_labels)
45
+ return result["labels"][0]
46
+
47
+ def load_advice(filename):
48
+ advice_list = []
49
+ try:
50
+ with open(filename, newline='', encoding='utf-8') as csvfile:
51
+ reader = csv.DictReader(csvfile)
52
+ for row in reader:
53
+ advice_list.append({
54
+ "min_score": float(row["min_score"]),
55
+ "max_score": float(row["max_score"]),
56
+ "subject": row["subject"],
57
+ "advice_parent": row["advice_parent"],
58
+ "advice_teacher": row["advice_teacher"],
59
+ "study_plan": row["study_plan"],
60
+ "recommended_books": row["recommended_books"]
61
+ })
62
+ except Exception as e:
63
+ print("Advice file error:", e)
64
+ return advice_list
65
+
66
+ def get_advice(score, subject, advice_list):
67
+ filtered = [a for a in advice_list
68
+ if a["subject"].lower() == subject.lower()
69
+ and a["min_score"] <= score <= a["max_score"]]
70
+ if filtered:
71
+ return random.choice(filtered)
72
+ return {
73
+ "advice_parent": "No parent advice available.",
74
+ "advice_teacher": "No teacher advice available.",
75
+ "study_plan": "No study plan available.",
76
+ "recommended_books": "No books available."
77
+ }
78
+
79
+ def ocr_from_array(image):
80
+ image = np.ascontiguousarray(image)
81
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
82
+ result = ochr.ocr(gray, cls=True)
83
+ return "\n".join([line[1][0] for line in result[0]])
84
+
85
+ def preprocess_text(text):
86
+ return " ".join(
87
+ token.lemma_ for token in nlp(text.lower())
88
+ if not token.is_stop and not token.is_punct
89
+ )
90
+
91
+ def text_to_vector_sbert(text):
92
+ return sbert_model.encode(text, convert_to_tensor=True)
93
+
94
+ def compute_similarity(text1, text2):
95
+ return util.pytorch_cos_sim(
96
+ text_to_vector_sbert(text1),
97
+ text_to_vector_sbert(text2)
98
+ ).item()
99
+
100
+ def contains_keyword(reference, student):
101
+ tr = str.maketrans('', '', string.punctuation)
102
+ return bool(
103
+ set(reference.lower().translate(tr).split()) &
104
+ set(student.lower().translate(tr).split())
105
+ )
106
+
107
+ def check_entailment(student, reference):
108
+ scores = entailment_classifier(f"{student} </s></s> {reference}", truncation=True)
109
+ for item in scores[0]:
110
+ if item["label"] == "ENTAILMENT":
111
+ return item["score"]
112
+ return 0.0
113
+
114
+ def entity_match(ref_ans, stud_ans):
115
+ return bool({ent.text.lower() for ent in nlp(ref_ans).ents} &
116
+ {ent.text.lower() for ent in nlp(stud_ans).ents})
117
+
118
+ def extract_numbers(text):
119
+ nums = set(re.findall(r'\d+', text))
120
+ words = {"zero": "0", "one": "1", "two": "2", "three": "3",
121
+ "four": "4", "five": "5", "six": "6", "seven": "7",
122
+ "eight": "8", "nine": "9", "ten": "10"}
123
+ for w in text.lower().split():
124
+ tok = w.strip(string.punctuation)
125
+ if tok in words:
126
+ nums.add(words[tok])
127
+ return nums
128
+
129
+ def is_year(text):
130
+ clean = text.strip().replace(".", "")
131
+ years = re.findall(r'\d{4}', clean)
132
+ return len(years) == 1 and re.sub(r'\d{4}', '', clean).strip(string.punctuation + " ") == ""
133
+
134
+ def advanced_grade(ref_ans, stud_ans, similarity, threshold=0.8, max_grade=100):
135
+ min_corr, min_inc = 50, 30
136
+ tr = str.maketrans('', '', string.punctuation)
137
+ r = ref_ans.lower().translate(tr).strip()
138
+ s = stud_ans.lower().translate(tr).strip()
139
+ base = similarity * max_grade
140
+ if is_year(ref_ans):
141
+ ref_years = re.findall(r'\d{4}', ref_ans)
142
+ stud_years = re.findall(r'\d{4}', stud_ans)
143
+ if not stud_years or ref_years[0] != stud_years[0]:
144
+ grade = min_inc if contains_keyword(ref_ans, stud_ans) else 0
145
+ mark = "Incorrect"
146
+ else:
147
+ grade, mark = max_grade, "Correct"
148
+ elif r == s or (len(s.split()) <= 3 and contains_keyword(ref_ans, stud_ans)) or \
149
+ (extract_numbers(stud_ans) & extract_numbers(ref_ans)) or \
150
+ check_entailment(stud_ans, ref_ans) > 0.9:
151
+ grade, mark = max_grade, "Correct"
152
+ elif entity_match(ref_ans, stud_ans) or (contains_keyword(ref_ans, stud_ans) and similarity < threshold):
153
+ grade = max(base, threshold * max_grade)
154
+ mark = "Correct"
155
+ elif contains_keyword(ref_ans, stud_ans) or similarity >= threshold:
156
+ grade = min(base + 10, max_grade)
157
+ mark = "Correct"
158
+ else:
159
+ grade = max(base, min_inc) if contains_keyword(ref_ans, stud_ans) else base
160
+ mark = "Incorrect"
161
+ if mark == "Correct":
162
+ rw, sw = len(ref_ans.split()), len(stud_ans.split())
163
+ if rw > 0 and sw < rw:
164
+ grade = max(min_corr, grade * (sw / rw))
165
+ return grade, mark
166
+
167
+ def correct_token(token):
168
+ rep = {'o':'0','O':'0','l':'1','I':'1','|':'1','z':'2','Z':'2',
169
+ 'e':'3','E':'3','a':'4','A':'4','y':'4','Y':'4','s':'5','S':'5',
170
+ 'g':'6','G':'6','t':'7','T':'7','b':'8','B':'8','q':'9','Q':'9'}
171
+ return ''.join(rep.get(c, c) for c in token)
172
+
173
+ def fix_question_prefix(line):
174
+ if not line:
175
+ return line
176
+ first, rest = line[0], line[1:]
177
+ mapping = {'I': '1', 'l': '1', '|': '1', 'S': '5', 's': '5'}
178
+ if first in mapping and rest and rest[0] in ".- )":
179
+ return mapping[first] + rest
180
+ return line
181
+
182
+ def parse_reference_answers(text):
183
+ ref_dict = {}
184
+ lines = text.splitlines()
185
+ current_question = None
186
+ question_text = ""
187
+ answer_text = ""
188
+ i = 0
189
+ while i < len(lines):
190
+ raw_line = lines[i]
191
+ line = fix_question_prefix(raw_line).strip()
192
+ if not line:
193
+ i += 1
194
+ continue
195
+ q_match = re.match(r'^(\d+)[\s\.\-]+(.+)', line)
196
+ if q_match:
197
+ if current_question is not None and question_text:
198
+ ref_dict[current_question] = {"question": question_text.strip(), "answer": answer_text.strip()}
199
+ current_question = int(q_match.group(1))
200
+ question_text = q_match.group(2).strip()
201
+ answer_text = ""
202
+ if i + 1 < len(lines) and "answer" in lines[i+1].lower():
203
+ answer_line = fix_question_prefix(lines[i+1]).strip()
204
+ answer_match = re.match(r'^answer:?[ \t]*(.+)', answer_line, re.IGNORECASE)
205
+ if answer_match:
206
+ answer_text = answer_match.group(1).strip()
207
+ i += 2
208
+ continue
209
+ i += 1
210
+ continue
211
+ a_match = re.match(r'^answer:?[ \t]*(.+)', line, re.IGNORECASE)
212
+ if a_match and current_question is not None:
213
+ answer_text = a_match.group(1).strip()
214
+ i += 1
215
+ continue
216
+ if current_question is not None:
217
+ if not answer_text:
218
+ question_text += " " + line
219
+ else:
220
+ answer_text += " " + line
221
+ i += 1
222
+ if current_question is not None and question_text:
223
+ ref_dict[current_question] = {"question": question_text.strip(), "answer": answer_text.strip()}
224
+ for q in ref_dict:
225
+ if not ref_dict[q]["question"].strip().endswith('?'):
226
+ ref_dict[q]["question"] += '?'
227
+ return ref_dict
228
+
229
+ def parse_student_answers(text):
230
+ stud_dict = {}
231
+ lines = text.splitlines()
232
+ for line in lines:
233
+ line = line.strip()
234
+ if not line:
235
+ continue
236
+ match = re.match(r'^(\d+)[\s\.\-]+(.+)', line)
237
+ if match:
238
+ stud_dict[int(match.group(1))] = match.group(2).strip()
239
+ continue
240
+ match = re.match(r'^(\d+)[\.|\)][\s]*(.+)', line)
241
+ if match:
242
+ stud_dict[int(match.group(1))] = match.group(2).strip()
243
+ return stud_dict
244
+
245
+ def print_parsed_answers(ref_dict, stud_dict):
246
+ print("\n" + "="*80)
247
+ print("PARSED QUESTIONS AND ANSWERS".center(80))
248
+ print("="*80)
249
+ for q in sorted(ref_dict.keys()):
250
+ print(f"\nQuestion {q}:")
251
+ print(f" Question text: {ref_dict[q]['question']}")
252
+ print(f" Reference answer: {ref_dict[q]['answer']}")
253
+ print(f" Student answer: {stud_dict.get(q, 'No answer provided')}")
254
+ print("\nMissing reference questions:", set(stud_dict.keys()) - set(ref_dict.keys()))
255
+ print("Missing student answers:", set(ref_dict.keys()) - set(stud_dict.keys()))
256
+ print("="*80 + "\n")
257
+
258
+ def display_results_in_terminal(results, mcq_results=None):
259
+ print("\n" + "="*80)
260
+ print("GRADING DETAILS".center(80))
261
+ print("="*80)
262
+ if results:
263
+ print("\nFREE-TEXT ANSWERS GRADING:\n")
264
+ for r in results:
265
+ print(f"Question {r['Question Number']}:")
266
+ print(f" Subject: {r['Subject']}")
267
+ print(f" Similarity Score: {r['Similarity']:.2f}")
268
+ print(f" Grade: {r['Grade']:.1f}")
269
+ print(f" Mark: {r['Mark']}")
270
+ print("-"*70)
271
+ if mcq_results:
272
+ print("\nMCQ ANSWERS GRADING:\n")
273
+ print(f"Correct Questions: {mcq_results['Correct Questions']}")
274
+ print(f"Incorrect Questions: {mcq_results['Incorrect Questions']}")
275
+ print(f"Total Grade: {mcq_results['Total Grade']:.1f}")
276
+ print(f"Letter Grade: {mcq_results['Letter Grade']}")
277
+ print("="*80 + "\n")
278
+
279
+ def grade_answers(ref_dict, stud_dict, advice_list, threshold=0.8, max_grade=100):
280
+ results, total, p_adv, t_adv = [], 0, "", ""
281
+ for q in sorted(ref_dict):
282
+ entry = ref_dict[q]
283
+ sim = compute_similarity(
284
+ preprocess_text(entry['answer']),
285
+ preprocess_text(stud_dict.get(q, ''))
286
+ )
287
+ grade, mark = advanced_grade(entry['answer'], stud_dict.get(q, ''), sim, threshold, max_grade)
288
+ total += grade
289
+ adv = get_advice(grade, classify_subject(entry['question']), advice_list)
290
+ if not p_adv and adv['advice_parent']:
291
+ p_adv = adv['advice_parent']
292
+ if not t_adv and adv['advice_teacher']:
293
+ t_adv = adv['advice_teacher']
294
+ results.append(OrderedDict([
295
+ ("Question Number", q),
296
+ ("Question", entry['question']),
297
+ ("Subject", classify_subject(entry['question'])),
298
+ ("Reference", entry['answer']),
299
+ ("Student", stud_dict.get(q, 'No answer provided')),
300
+ ("Similarity", sim),
301
+ ("Grade", grade),
302
+ ("Mark", mark),
303
+ ("Advice for Parents", adv['advice_parent']),
304
+ ("Advice for Teachers", adv['advice_teacher']),
305
+ ("Study Plan", adv['study_plan']),
306
+ ("Recommended Books", adv['recommended_books'])
307
+ ]))
308
+ overall = total / len(ref_dict) if ref_dict else 0
309
+ display_results_in_terminal(results)
310
+ return results, overall, numeric_to_letter_grade(overall), \
311
+ (p_adv or "Encourage your child to review areas where they struggled."), \
312
+ (t_adv or "Consider focusing additional instruction on areas where the student showed weakness.")
313
+
314
+ def extract_mcq_answers_from_image(image, num_questions=None):
315
+ margin, vgap, header = 50, 60, 60
316
+ if num_questions is None:
317
+ num_questions = (image.shape[0] - 2*margin - header) // vgap
318
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if len(image.shape) == 3 else image
319
+ _, thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
320
+ answers = {}
321
+ for i in range(1, num_questions + 1):
322
+ y = margin + header + (i - 1) * vgap
323
+ for idx, opt in enumerate(["A", "B", "C", "D"]):
324
+ x = margin + 50 + idx * 100
325
+ r = 15
326
+ reg = thresh[y-r:y+r, x-r:x+r]
327
+ if reg.size and np.mean(reg) < 150:
328
+ answers[i] = opt
329
+ break
330
+ return answers
331
+
332
+ def numeric_to_letter_grade(grade):
333
+ if grade >= 90: return "A+"
334
+ if grade >= 85: return "A"
335
+ if grade >= 80: return "A-"
336
+ if grade >= 75: return "B+"
337
+ if grade >= 70: return "B"
338
+ if grade >= 65: return "B-"
339
+ if grade >= 60: return "C+"
340
+ if grade >= 50: return "C"
341
+ if grade >= 40: return "D+"
342
+ if grade >= 30: return "D"
343
+ return "F"
344
+
345
+ def grade_mcq_answers(correct_dict, student_dict, points_per_question=1):
346
+ correct, incorrect = [], []
347
+ score = 0
348
+ for q in sorted(correct_dict):
349
+ if student_dict.get(q) == correct_dict[q]:
350
+ correct.append(q)
351
+ score += points_per_question
352
+ else:
353
+ incorrect.append(q)
354
+ total = (score / (len(correct_dict) * points_per_question)) * 100 if correct_dict else 0
355
+ return {"Correct Questions": correct,
356
+ "Incorrect Questions": incorrect,
357
+ "Total Grade": total,
358
+ "Letter Grade": numeric_to_letter_grade(total)}
359
+
360
+ def generate_random_id():
361
+ return random.randint(10000, 99999)
362
+
363
+ @app.route('/grade_exam', methods=['POST'])
364
+ def grade_exam():
365
+ if 'ref_image' not in request.files or 'stud_image' not in request.files:
366
+ return Response(json.dumps({"Error": "Missing one or both image files."}), status=400, mimetype='application/json')
367
+ ref_file = request.files['ref_image']
368
+ stud_file = request.files['stud_image']
369
+ ref_bytes = np.frombuffer(ref_file.read(), np.uint8)
370
+ stud_bytes = np.frombuffer(stud_file.read(), np.uint8)
371
+ ref_img = cv2.imdecode(ref_bytes, cv2.IMREAD_COLOR)
372
+ stud_img = cv2.imdecode(stud_bytes, cv2.IMREAD_COLOR)
373
+ if ref_img is None or stud_img is None:
374
+ return Response(json.dumps({"Error": "One or both images could not be processed."}), status=400, mimetype='application/json')
375
+ margin, vgap, header = 50, 60, 60
376
+ computed_questions = (ref_img.shape[0] - 2*margin - header) // vgap
377
+ mcq_ref = extract_mcq_answers_from_image(ref_img, num_questions=computed_questions)
378
+ mcq_stud = extract_mcq_answers_from_image(stud_img, num_questions=computed_questions)
379
+ if len(mcq_ref) >= computed_questions // 2 and len(mcq_stud) >= computed_questions // 2:
380
+ mcq_result = grade_mcq_answers(mcq_ref, mcq_stud)
381
+ total_grade = mcq_result["Total Grade"]
382
+ letter_grade = mcq_result["Letter Grade"]
383
+ parent_advice = "Review incorrect answers with your child and focus on identified knowledge gaps."
384
+ teacher_advice = "Consider revisiting topics with high error rates in upcoming lessons."
385
+ display_results_in_terminal(None, mcq_result)
386
+ else:
387
+ advice_file = 'data/advice.csv'
388
+ ref_text = ocr_from_array(ref_img)
389
+ stud_text = ocr_from_array(stud_img)
390
+ ref_answers = parse_reference_answers(ref_text)
391
+ stud_answers = parse_student_answers(stud_text)
392
+ print_parsed_answers(ref_answers, stud_answers)
393
+ advice_list = load_advice(advice_file)
394
+ results, total_grade, letter_grade, parent_advice, teacher_advice = grade_answers(
395
+ ref_answers, stud_answers, advice_list, threshold=0.8, max_grade=100
396
+ )
397
+ exam_id = request.form.get("examId")
398
+ student_idg = request.form.get("StudentIDg")
399
+ parent_id = request.form.get("parentId")
400
+ teacher_id = request.form.get("teacherId")
401
+ grade_payload = {
402
+ "id": str(generate_random_id()),
403
+ "examId": exam_id,
404
+ "obtainedMarks": str(total_grade),
405
+ "grade": letter_grade,
406
+ "StudentIDg": student_idg
407
+ }
408
+ advice_payload = {
409
+ "id": str(generate_random_id()),
410
+ "parentAdvice": parent_advice,
411
+ "teacherAdvice": teacher_advice,
412
+ "parentId": parent_id,
413
+ "teacherId": teacher_id
414
+ }
415
+ try:
416
+ grade_resp = requests.post("http://54.242.19.19:3000/api/grades/", json=grade_payload)
417
+ advice_resp = requests.post("http://54.242.19.19:3000/api/advices/create/", json=advice_payload)
418
+ print("β†’ Posted grade payload:", json.dumps(grade_payload, indent=2))
419
+ print("β†’ Grade API response:", grade_resp.status_code, grade_resp.text)
420
+ print("β†’ Posted advice payload:", json.dumps(advice_payload, indent=2))
421
+ print("β†’ Advice API response:", advice_resp.status_code, advice_resp.text)
422
+ except Exception as e:
423
+ print("Error sending to external APIs:", e)
424
+ return Response(
425
+ json.dumps({"status": "ok", "message": "Grade and advice sent to external services."}),
426
+ status=200, mimetype="application/json"
427
+ )
428
+
429
+ if __name__ == '__main__':
430
+ port = int(os.environ.get("PORT", 7860))
431
+ app.run(host="0.0.0.0", port=port, debug=False)
data/advice.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ numpy
2
+ opencv-python-headless
3
+ spacy
4
+ flask
5
+ paddleocr
6
+ paddlepaddle
7
+ sentence-transformers
8
+ transformers