Commit
·
666fb5d
1
Parent(s):
43ce63b
update_
Browse files
README.md
CHANGED
@@ -6,7 +6,50 @@ colorTo: green
|
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
license: apache-2.0
|
9 |
-
|
10 |
---
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
sdk: docker
|
7 |
pinned: false
|
8 |
license: apache-2.0
|
9 |
+
app_port: 7860
|
10 |
---
|
11 |
|
12 |
+
# Answer Grading App
|
13 |
+
|
14 |
+
This is an AI-powered answer grading application that can grade student answers using multiple similarity metrics and LLM-based scoring.
|
15 |
+
|
16 |
+
## Features
|
17 |
+
|
18 |
+
- Support for PDF and CSV input formats
|
19 |
+
- Multiple similarity metrics:
|
20 |
+
- TF-IDF scoring
|
21 |
+
- Semantic similarity
|
22 |
+
- Word-level similarity
|
23 |
+
- LLM-based scoring
|
24 |
+
- Handwritten text recognition (HTR) support
|
25 |
+
- Rate limiting and CORS protection
|
26 |
+
- Comprehensive logging
|
27 |
+
|
28 |
+
## Usage
|
29 |
+
|
30 |
+
1. Upload your query file (text file with questions)
|
31 |
+
2. Choose input format (PDF or CSV)
|
32 |
+
3. Upload answer files (PDFs or CSV)
|
33 |
+
4. Get graded results
|
34 |
+
|
35 |
+
## API Endpoints
|
36 |
+
|
37 |
+
- `/`: Main interface
|
38 |
+
- `/compute_answers`: Process and generate answers
|
39 |
+
- `/compute_marks`: Calculate marks for student answers
|
40 |
+
- `/check_logs`: View application logs
|
41 |
+
|
42 |
+
## Environment Variables
|
43 |
+
|
44 |
+
- `PORT`: Server port (default: 7860)
|
45 |
+
- `HF_HOME`: Hugging Face cache directory
|
46 |
+
- `GENSIM_DATA_DIR`: Gensim data directory
|
47 |
+
|
48 |
+
## Rate Limits
|
49 |
+
|
50 |
+
- 200 requests per day
|
51 |
+
- 50 requests per hour
|
52 |
+
|
53 |
+
## Dependencies
|
54 |
+
|
55 |
+
See `requirements.txt` for full list of dependencies.
|
main.py
CHANGED
@@ -5,18 +5,13 @@ import logging
|
|
5 |
import sys
|
6 |
import builtins
|
7 |
from datetime import datetime
|
|
|
|
|
|
|
|
|
8 |
|
9 |
-
#
|
10 |
-
|
11 |
-
level=logging.INFO,
|
12 |
-
format='%(message)s', # Simplified format for better readability
|
13 |
-
handlers=[
|
14 |
-
logging.StreamHandler(sys.stdout)
|
15 |
-
]
|
16 |
-
)
|
17 |
-
|
18 |
-
# Create a custom logger
|
19 |
-
logger = logging.getLogger(__name__)
|
20 |
|
21 |
# Create a logs directory in the temp folder
|
22 |
log_dir = os.path.join(tempfile.gettempdir(), 'app_logs')
|
@@ -25,7 +20,7 @@ os.makedirs(log_dir, exist_ok=True)
|
|
25 |
# Create a log file with timestamp
|
26 |
log_file = os.path.join(log_dir, f'app_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log')
|
27 |
|
28 |
-
# Set up logging to both file and console
|
29 |
logging.basicConfig(
|
30 |
level=logging.INFO,
|
31 |
format='%(asctime)s - %(levelname)s - %(message)s',
|
@@ -35,6 +30,9 @@ logging.basicConfig(
|
|
35 |
]
|
36 |
)
|
37 |
|
|
|
|
|
|
|
38 |
# Add a print function that also logs
|
39 |
def log_print(message, level="INFO"):
|
40 |
# Use the original print function to avoid recursion
|
@@ -50,30 +48,33 @@ def log_print(message, level="INFO"):
|
|
50 |
cache_dir = tempfile.mkdtemp()
|
51 |
nltk_data_dir = os.path.join(cache_dir, 'nltk_data')
|
52 |
gensim_data_dir = os.path.join(cache_dir, 'gensim-data')
|
53 |
-
upload_dir = os.path.join(cache_dir, 'uploads')
|
54 |
-
ans_image_dir = os.path.join(cache_dir, 'ans_image')
|
55 |
|
56 |
# Set environment variables
|
57 |
os.environ['HF_HOME'] = cache_dir
|
58 |
os.environ['GENSIM_DATA_DIR'] = gensim_data_dir
|
59 |
|
60 |
# Create directories with correct permissions
|
61 |
-
|
62 |
-
|
63 |
-
os.makedirs(
|
64 |
-
|
|
|
|
|
65 |
|
66 |
# Add the custom directory to NLTK's search path
|
67 |
nltk.data.path.insert(0, nltk_data_dir)
|
68 |
|
69 |
# Download required NLTK data
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
except Exception as e:
|
76 |
-
|
|
|
77 |
|
78 |
from flask import Flask, request, jsonify, render_template
|
79 |
import json
|
@@ -91,6 +92,22 @@ app = Flask(__name__)
|
|
91 |
# Use the new upload directory
|
92 |
UPLOAD_FOLDER = upload_dir
|
93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
@app.route('/')
|
95 |
def index():
|
96 |
return render_template('index.html')
|
@@ -109,10 +126,21 @@ def compute_answers():
|
|
109 |
log_print("Missing query file", "ERROR")
|
110 |
return jsonify({"error": "Missing query file"}), 400
|
111 |
|
112 |
-
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
file_type = request.form.get('file_type')
|
|
|
|
|
|
|
|
|
116 |
ans_csv_file = request.files.get('ans_csv_file')
|
117 |
|
118 |
if file_type == "csv":
|
@@ -120,12 +148,16 @@ def compute_answers():
|
|
120 |
log_print("Missing answer CSV file", "ERROR")
|
121 |
return jsonify({"error": "Missing answer CSV file"}), 400
|
122 |
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
|
|
|
|
|
|
|
|
129 |
|
130 |
c_answers = []
|
131 |
|
@@ -189,18 +221,34 @@ def compute_marks():
|
|
189 |
log_print("No answers provided", "ERROR")
|
190 |
return jsonify({"error": "No answers provided"}), 400
|
191 |
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
200 |
|
201 |
# Initialize data structure and parent folder
|
202 |
data = {}
|
203 |
-
parent_folder =
|
204 |
|
205 |
# Check if answers exist
|
206 |
if not answers:
|
@@ -212,7 +260,7 @@ def compute_marks():
|
|
212 |
student_path = os.path.join(parent_folder, student_folder)
|
213 |
if os.path.isdir(student_path):
|
214 |
for image_file in os.listdir(student_path):
|
215 |
-
if image_file.endswith(
|
216 |
full_path = os.path.join(student_path, image_file).replace("\\", "/")
|
217 |
if student_folder in data:
|
218 |
data[student_folder].append(full_path)
|
@@ -266,6 +314,14 @@ def compute_marks():
|
|
266 |
for student, marks_list in s_marks.items():
|
267 |
log_print(f"{student}: {marks_list}")
|
268 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
return jsonify({"message": s_marks}), 200
|
270 |
|
271 |
except Exception as e:
|
@@ -326,5 +382,33 @@ def check_logs():
|
|
326 |
except Exception as e:
|
327 |
return jsonify({"error": str(e)})
|
328 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
329 |
if __name__ == '__main__':
|
330 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
import sys
|
6 |
import builtins
|
7 |
from datetime import datetime
|
8 |
+
from flask_limiter import Limiter
|
9 |
+
from flask_limiter.util import get_remote_address
|
10 |
+
from flask_cors import CORS
|
11 |
+
from dotenv import load_dotenv
|
12 |
|
13 |
+
# Load environment variables
|
14 |
+
load_dotenv()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
# Create a logs directory in the temp folder
|
17 |
log_dir = os.path.join(tempfile.gettempdir(), 'app_logs')
|
|
|
20 |
# Create a log file with timestamp
|
21 |
log_file = os.path.join(log_dir, f'app_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log')
|
22 |
|
23 |
+
# Set up logging to both file and console (single configuration)
|
24 |
logging.basicConfig(
|
25 |
level=logging.INFO,
|
26 |
format='%(asctime)s - %(levelname)s - %(message)s',
|
|
|
30 |
]
|
31 |
)
|
32 |
|
33 |
+
# Create a custom logger
|
34 |
+
logger = logging.getLogger(__name__)
|
35 |
+
|
36 |
# Add a print function that also logs
|
37 |
def log_print(message, level="INFO"):
|
38 |
# Use the original print function to avoid recursion
|
|
|
48 |
cache_dir = tempfile.mkdtemp()
|
49 |
nltk_data_dir = os.path.join(cache_dir, 'nltk_data')
|
50 |
gensim_data_dir = os.path.join(cache_dir, 'gensim-data')
|
51 |
+
upload_dir = os.path.join(cache_dir, 'uploads')
|
52 |
+
ans_image_dir = os.path.join(cache_dir, 'ans_image')
|
53 |
|
54 |
# Set environment variables
|
55 |
os.environ['HF_HOME'] = cache_dir
|
56 |
os.environ['GENSIM_DATA_DIR'] = gensim_data_dir
|
57 |
|
58 |
# Create directories with correct permissions
|
59 |
+
for directory in [nltk_data_dir, gensim_data_dir, upload_dir, ans_image_dir]:
|
60 |
+
try:
|
61 |
+
os.makedirs(directory, exist_ok=True)
|
62 |
+
except Exception as e:
|
63 |
+
log_print(f"Error creating directory {directory}: {e}", "ERROR")
|
64 |
+
raise
|
65 |
|
66 |
# Add the custom directory to NLTK's search path
|
67 |
nltk.data.path.insert(0, nltk_data_dir)
|
68 |
|
69 |
# Download required NLTK data
|
70 |
+
required_nltk_data = ['stopwords', 'punkt', 'wordnet']
|
71 |
+
for data in required_nltk_data:
|
72 |
+
try:
|
73 |
+
log_print(f"Downloading NLTK data: {data}")
|
74 |
+
nltk.download(data, download_dir=nltk_data_dir)
|
75 |
+
except Exception as e:
|
76 |
+
log_print(f"Error downloading NLTK data {data}: {e}", "ERROR")
|
77 |
+
raise
|
78 |
|
79 |
from flask import Flask, request, jsonify, render_template
|
80 |
import json
|
|
|
92 |
# Use the new upload directory
|
93 |
UPLOAD_FOLDER = upload_dir
|
94 |
|
95 |
+
# Configure CORS for Hugging Face
|
96 |
+
CORS(app, resources={
|
97 |
+
r"/*": {
|
98 |
+
"origins": ["*"],
|
99 |
+
"methods": ["GET", "POST", "OPTIONS"],
|
100 |
+
"allow_headers": ["Content-Type", "Authorization"]
|
101 |
+
}
|
102 |
+
})
|
103 |
+
|
104 |
+
# Initialize rate limiter
|
105 |
+
limiter = Limiter(
|
106 |
+
app=app,
|
107 |
+
key_func=get_remote_address,
|
108 |
+
default_limits=["200 per day", "50 per hour"]
|
109 |
+
)
|
110 |
+
|
111 |
@app.route('/')
|
112 |
def index():
|
113 |
return render_template('index.html')
|
|
|
126 |
log_print("Missing query file", "ERROR")
|
127 |
return jsonify({"error": "Missing query file"}), 400
|
128 |
|
129 |
+
try:
|
130 |
+
queries = query_file.read().decode('utf-8').splitlines()
|
131 |
+
if not queries:
|
132 |
+
log_print("No queries found in file", "ERROR")
|
133 |
+
return jsonify({"error": "No queries found in file"}), 400
|
134 |
+
log_print(f"Received queries: {queries}")
|
135 |
+
except UnicodeDecodeError:
|
136 |
+
log_print("Invalid file encoding", "ERROR")
|
137 |
+
return jsonify({"error": "Invalid file encoding"}), 400
|
138 |
|
139 |
file_type = request.form.get('file_type')
|
140 |
+
if not file_type:
|
141 |
+
log_print("Missing file type", "ERROR")
|
142 |
+
return jsonify({"error": "Missing file type"}), 400
|
143 |
+
|
144 |
ans_csv_file = request.files.get('ans_csv_file')
|
145 |
|
146 |
if file_type == "csv":
|
|
|
148 |
log_print("Missing answer CSV file", "ERROR")
|
149 |
return jsonify({"error": "Missing answer CSV file"}), 400
|
150 |
|
151 |
+
try:
|
152 |
+
ans_csv_file = ans_csv_file.read().decode('utf-8').splitlines()
|
153 |
+
c_answers = []
|
154 |
+
for i in ans_csv_file:
|
155 |
+
c_answers.append(i.split('\\n'))
|
156 |
+
log_print(f"Processed CSV answers: {c_answers}")
|
157 |
+
return jsonify({"answers": c_answers}), 200
|
158 |
+
except UnicodeDecodeError:
|
159 |
+
log_print("Invalid CSV file encoding", "ERROR")
|
160 |
+
return jsonify({"error": "Invalid CSV file encoding"}), 400
|
161 |
|
162 |
c_answers = []
|
163 |
|
|
|
221 |
log_print("No answers provided", "ERROR")
|
222 |
return jsonify({"error": "No answers provided"}), 400
|
223 |
|
224 |
+
try:
|
225 |
+
log_print("=== Processing Answers ===")
|
226 |
+
log_print(f"Received answers: {a}")
|
227 |
+
a = json.loads(a)
|
228 |
+
answers = []
|
229 |
+
for i in a:
|
230 |
+
ans = i.split('\n\n')
|
231 |
+
answers.append(ans)
|
232 |
+
log_print(f"Processed answers structure: {answers}")
|
233 |
+
except json.JSONDecodeError:
|
234 |
+
log_print("Invalid JSON format in answers", "ERROR")
|
235 |
+
return jsonify({"error": "Invalid JSON format in answers"}), 400
|
236 |
+
|
237 |
+
# Add validation for answers
|
238 |
+
def validate_answers(answers):
|
239 |
+
if not isinstance(answers, list):
|
240 |
+
return False
|
241 |
+
if not all(isinstance(ans, str) for ans in answers):
|
242 |
+
return False
|
243 |
+
return True
|
244 |
+
|
245 |
+
if not validate_answers(answers):
|
246 |
+
log_print("Invalid answer format", "ERROR")
|
247 |
+
return jsonify({"error": "Invalid answer format"}), 400
|
248 |
|
249 |
# Initialize data structure and parent folder
|
250 |
data = {}
|
251 |
+
parent_folder = ans_image_dir # Use the temp directory path defined earlier
|
252 |
|
253 |
# Check if answers exist
|
254 |
if not answers:
|
|
|
260 |
student_path = os.path.join(parent_folder, student_folder)
|
261 |
if os.path.isdir(student_path):
|
262 |
for image_file in os.listdir(student_path):
|
263 |
+
if image_file.endswith('.jpg'): # Correct syntax for single extension
|
264 |
full_path = os.path.join(student_path, image_file).replace("\\", "/")
|
265 |
if student_folder in data:
|
266 |
data[student_folder].append(full_path)
|
|
|
314 |
for student, marks_list in s_marks.items():
|
315 |
log_print(f"{student}: {marks_list}")
|
316 |
|
317 |
+
# Add cleanup at the end
|
318 |
+
try:
|
319 |
+
import shutil
|
320 |
+
shutil.rmtree(ans_image_dir)
|
321 |
+
os.makedirs(ans_image_dir, exist_ok=True)
|
322 |
+
except Exception as e:
|
323 |
+
log_print(f"Warning: Could not clean up ans_image directory: {e}", "WARNING")
|
324 |
+
|
325 |
return jsonify({"message": s_marks}), 200
|
326 |
|
327 |
except Exception as e:
|
|
|
382 |
except Exception as e:
|
383 |
return jsonify({"error": str(e)})
|
384 |
|
385 |
+
# Add file type validation
|
386 |
+
def is_valid_image_file(filename):
|
387 |
+
valid_extensions = {'.jpg', '.jpeg', '.png'}
|
388 |
+
return os.path.splitext(filename)[1].lower() in valid_extensions
|
389 |
+
|
390 |
+
def allowed_file(filename, allowed_extensions):
|
391 |
+
return '.' in filename and \
|
392 |
+
filename.rsplit('.', 1)[1].lower() in allowed_extensions
|
393 |
+
|
394 |
+
def cleanup_temp_files():
|
395 |
+
"""Clean up temporary files and directories"""
|
396 |
+
try:
|
397 |
+
import shutil
|
398 |
+
temp_dirs = [ans_image_dir, upload_dir, nltk_data_dir, gensim_data_dir]
|
399 |
+
for directory in temp_dirs:
|
400 |
+
if os.path.exists(directory):
|
401 |
+
shutil.rmtree(directory)
|
402 |
+
os.makedirs(directory, exist_ok=True)
|
403 |
+
log_print("Successfully cleaned up temporary files")
|
404 |
+
except Exception as e:
|
405 |
+
log_print(f"Error cleaning up temporary files: {e}", "ERROR")
|
406 |
+
|
407 |
if __name__ == '__main__':
|
408 |
+
try:
|
409 |
+
# Get port from environment variable or use default
|
410 |
+
port = int(os.environ.get('PORT', 7860))
|
411 |
+
# Use 0.0.0.0 for Hugging Face
|
412 |
+
app.run(host='0.0.0.0', port=port)
|
413 |
+
finally:
|
414 |
+
cleanup_temp_files()
|