yamanavijayavardhan commited on
Commit
666fb5d
·
1 Parent(s): 43ce63b
Files changed (2) hide show
  1. README.md +45 -2
  2. main.py +128 -44
README.md CHANGED
@@ -6,7 +6,50 @@ colorTo: green
6
  sdk: docker
7
  pinned: false
8
  license: apache-2.0
9
- short_description: A application for automated descriptive answer evaluation
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  sdk: docker
7
  pinned: false
8
  license: apache-2.0
9
+ app_port: 7860
10
  ---
11
 
12
+ # Answer Grading App
13
+
14
+ This is an AI-powered answer grading application that can grade student answers using multiple similarity metrics and LLM-based scoring.
15
+
16
+ ## Features
17
+
18
+ - Support for PDF and CSV input formats
19
+ - Multiple similarity metrics:
20
+ - TF-IDF scoring
21
+ - Semantic similarity
22
+ - Word-level similarity
23
+ - LLM-based scoring
24
+ - Handwritten text recognition (HTR) support
25
+ - Rate limiting and CORS protection
26
+ - Comprehensive logging
27
+
28
+ ## Usage
29
+
30
+ 1. Upload your query file (text file with questions)
31
+ 2. Choose input format (PDF or CSV)
32
+ 3. Upload answer files (PDFs or CSV)
33
+ 4. Get graded results
34
+
35
+ ## API Endpoints
36
+
37
+ - `/`: Main interface
38
+ - `/compute_answers`: Process and generate answers
39
+ - `/compute_marks`: Calculate marks for student answers
40
+ - `/check_logs`: View application logs
41
+
42
+ ## Environment Variables
43
+
44
+ - `PORT`: Server port (default: 7860)
45
+ - `HF_HOME`: Hugging Face cache directory
46
+ - `GENSIM_DATA_DIR`: Gensim data directory
47
+
48
+ ## Rate Limits
49
+
50
+ - 200 requests per day
51
+ - 50 requests per hour
52
+
53
+ ## Dependencies
54
+
55
+ See `requirements.txt` for full list of dependencies.
main.py CHANGED
@@ -5,18 +5,13 @@ import logging
5
  import sys
6
  import builtins
7
  from datetime import datetime
 
 
 
 
8
 
9
- # Set up logging to write to stdout
10
- logging.basicConfig(
11
- level=logging.INFO,
12
- format='%(message)s', # Simplified format for better readability
13
- handlers=[
14
- logging.StreamHandler(sys.stdout)
15
- ]
16
- )
17
-
18
- # Create a custom logger
19
- logger = logging.getLogger(__name__)
20
 
21
  # Create a logs directory in the temp folder
22
  log_dir = os.path.join(tempfile.gettempdir(), 'app_logs')
@@ -25,7 +20,7 @@ os.makedirs(log_dir, exist_ok=True)
25
  # Create a log file with timestamp
26
  log_file = os.path.join(log_dir, f'app_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log')
27
 
28
- # Set up logging to both file and console
29
  logging.basicConfig(
30
  level=logging.INFO,
31
  format='%(asctime)s - %(levelname)s - %(message)s',
@@ -35,6 +30,9 @@ logging.basicConfig(
35
  ]
36
  )
37
 
 
 
 
38
  # Add a print function that also logs
39
  def log_print(message, level="INFO"):
40
  # Use the original print function to avoid recursion
@@ -50,30 +48,33 @@ def log_print(message, level="INFO"):
50
  cache_dir = tempfile.mkdtemp()
51
  nltk_data_dir = os.path.join(cache_dir, 'nltk_data')
52
  gensim_data_dir = os.path.join(cache_dir, 'gensim-data')
53
- upload_dir = os.path.join(cache_dir, 'uploads') # New upload directory in /tmp
54
- ans_image_dir = os.path.join(cache_dir, 'ans_image') # Move ans_image to temp directory
55
 
56
  # Set environment variables
57
  os.environ['HF_HOME'] = cache_dir
58
  os.environ['GENSIM_DATA_DIR'] = gensim_data_dir
59
 
60
  # Create directories with correct permissions
61
- os.makedirs(nltk_data_dir, exist_ok=True)
62
- os.makedirs(gensim_data_dir, exist_ok=True)
63
- os.makedirs(upload_dir, exist_ok=True)
64
- os.makedirs(ans_image_dir, exist_ok=True) # Create ans_image in temp directory
 
 
65
 
66
  # Add the custom directory to NLTK's search path
67
  nltk.data.path.insert(0, nltk_data_dir)
68
 
69
  # Download required NLTK data
70
- try:
71
- log_print("Downloading required NLTK data...")
72
- nltk.download('stopwords', download_dir=nltk_data_dir)
73
- nltk.download('punkt', download_dir=nltk_data_dir)
74
- nltk.download('wordnet', download_dir=nltk_data_dir)
75
- except Exception as e:
76
- log_print(f"Error downloading NLTK data: {e}", "ERROR")
 
77
 
78
  from flask import Flask, request, jsonify, render_template
79
  import json
@@ -91,6 +92,22 @@ app = Flask(__name__)
91
  # Use the new upload directory
92
  UPLOAD_FOLDER = upload_dir
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  @app.route('/')
95
  def index():
96
  return render_template('index.html')
@@ -109,10 +126,21 @@ def compute_answers():
109
  log_print("Missing query file", "ERROR")
110
  return jsonify({"error": "Missing query file"}), 400
111
 
112
- queries = query_file.read().decode('utf-8').splitlines()
113
- log_print(f"Received queries: {queries}")
 
 
 
 
 
 
 
114
 
115
  file_type = request.form.get('file_type')
 
 
 
 
116
  ans_csv_file = request.files.get('ans_csv_file')
117
 
118
  if file_type == "csv":
@@ -120,12 +148,16 @@ def compute_answers():
120
  log_print("Missing answer CSV file", "ERROR")
121
  return jsonify({"error": "Missing answer CSV file"}), 400
122
 
123
- ans_csv_file = ans_csv_file.read().decode('utf-8').splitlines()
124
- c_answers = []
125
- for i in ans_csv_file:
126
- c_answers.append(i.split('\\n'))
127
- log_print(f"Processed CSV answers: {c_answers}")
128
- return jsonify({"answers": c_answers}), 200
 
 
 
 
129
 
130
  c_answers = []
131
 
@@ -189,18 +221,34 @@ def compute_marks():
189
  log_print("No answers provided", "ERROR")
190
  return jsonify({"error": "No answers provided"}), 400
191
 
192
- log_print("=== Processing Answers ===")
193
- log_print(f"Received answers: {a}")
194
- a = json.loads(a)
195
- answers = []
196
- for i in a:
197
- ans = i.split('\n\n')
198
- answers.append(ans)
199
- log_print(f"Processed answers structure: {answers}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
  # Initialize data structure and parent folder
202
  data = {}
203
- parent_folder = "ans_image" # Changed from cache_dir to match reference code
204
 
205
  # Check if answers exist
206
  if not answers:
@@ -212,7 +260,7 @@ def compute_marks():
212
  student_path = os.path.join(parent_folder, student_folder)
213
  if os.path.isdir(student_path):
214
  for image_file in os.listdir(student_path):
215
- if image_file.endswith(('.jpg')): # Changed to match reference code
216
  full_path = os.path.join(student_path, image_file).replace("\\", "/")
217
  if student_folder in data:
218
  data[student_folder].append(full_path)
@@ -266,6 +314,14 @@ def compute_marks():
266
  for student, marks_list in s_marks.items():
267
  log_print(f"{student}: {marks_list}")
268
 
 
 
 
 
 
 
 
 
269
  return jsonify({"message": s_marks}), 200
270
 
271
  except Exception as e:
@@ -326,5 +382,33 @@ def check_logs():
326
  except Exception as e:
327
  return jsonify({"error": str(e)})
328
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
  if __name__ == '__main__':
330
- app.run(host='0.0.0.0', port=7860)
 
 
 
 
 
 
 
5
  import sys
6
  import builtins
7
  from datetime import datetime
8
+ from flask_limiter import Limiter
9
+ from flask_limiter.util import get_remote_address
10
+ from flask_cors import CORS
11
+ from dotenv import load_dotenv
12
 
13
+ # Load environment variables
14
+ load_dotenv()
 
 
 
 
 
 
 
 
 
15
 
16
  # Create a logs directory in the temp folder
17
  log_dir = os.path.join(tempfile.gettempdir(), 'app_logs')
 
20
  # Create a log file with timestamp
21
  log_file = os.path.join(log_dir, f'app_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log')
22
 
23
+ # Set up logging to both file and console (single configuration)
24
  logging.basicConfig(
25
  level=logging.INFO,
26
  format='%(asctime)s - %(levelname)s - %(message)s',
 
30
  ]
31
  )
32
 
33
+ # Create a custom logger
34
+ logger = logging.getLogger(__name__)
35
+
36
  # Add a print function that also logs
37
  def log_print(message, level="INFO"):
38
  # Use the original print function to avoid recursion
 
48
  cache_dir = tempfile.mkdtemp()
49
  nltk_data_dir = os.path.join(cache_dir, 'nltk_data')
50
  gensim_data_dir = os.path.join(cache_dir, 'gensim-data')
51
+ upload_dir = os.path.join(cache_dir, 'uploads')
52
+ ans_image_dir = os.path.join(cache_dir, 'ans_image')
53
 
54
  # Set environment variables
55
  os.environ['HF_HOME'] = cache_dir
56
  os.environ['GENSIM_DATA_DIR'] = gensim_data_dir
57
 
58
  # Create directories with correct permissions
59
+ for directory in [nltk_data_dir, gensim_data_dir, upload_dir, ans_image_dir]:
60
+ try:
61
+ os.makedirs(directory, exist_ok=True)
62
+ except Exception as e:
63
+ log_print(f"Error creating directory {directory}: {e}", "ERROR")
64
+ raise
65
 
66
  # Add the custom directory to NLTK's search path
67
  nltk.data.path.insert(0, nltk_data_dir)
68
 
69
  # Download required NLTK data
70
+ required_nltk_data = ['stopwords', 'punkt', 'wordnet']
71
+ for data in required_nltk_data:
72
+ try:
73
+ log_print(f"Downloading NLTK data: {data}")
74
+ nltk.download(data, download_dir=nltk_data_dir)
75
+ except Exception as e:
76
+ log_print(f"Error downloading NLTK data {data}: {e}", "ERROR")
77
+ raise
78
 
79
  from flask import Flask, request, jsonify, render_template
80
  import json
 
92
  # Use the new upload directory
93
  UPLOAD_FOLDER = upload_dir
94
 
95
+ # Configure CORS for Hugging Face
96
+ CORS(app, resources={
97
+ r"/*": {
98
+ "origins": ["*"],
99
+ "methods": ["GET", "POST", "OPTIONS"],
100
+ "allow_headers": ["Content-Type", "Authorization"]
101
+ }
102
+ })
103
+
104
+ # Initialize rate limiter
105
+ limiter = Limiter(
106
+ app=app,
107
+ key_func=get_remote_address,
108
+ default_limits=["200 per day", "50 per hour"]
109
+ )
110
+
111
  @app.route('/')
112
  def index():
113
  return render_template('index.html')
 
126
  log_print("Missing query file", "ERROR")
127
  return jsonify({"error": "Missing query file"}), 400
128
 
129
+ try:
130
+ queries = query_file.read().decode('utf-8').splitlines()
131
+ if not queries:
132
+ log_print("No queries found in file", "ERROR")
133
+ return jsonify({"error": "No queries found in file"}), 400
134
+ log_print(f"Received queries: {queries}")
135
+ except UnicodeDecodeError:
136
+ log_print("Invalid file encoding", "ERROR")
137
+ return jsonify({"error": "Invalid file encoding"}), 400
138
 
139
  file_type = request.form.get('file_type')
140
+ if not file_type:
141
+ log_print("Missing file type", "ERROR")
142
+ return jsonify({"error": "Missing file type"}), 400
143
+
144
  ans_csv_file = request.files.get('ans_csv_file')
145
 
146
  if file_type == "csv":
 
148
  log_print("Missing answer CSV file", "ERROR")
149
  return jsonify({"error": "Missing answer CSV file"}), 400
150
 
151
+ try:
152
+ ans_csv_file = ans_csv_file.read().decode('utf-8').splitlines()
153
+ c_answers = []
154
+ for i in ans_csv_file:
155
+ c_answers.append(i.split('\\n'))
156
+ log_print(f"Processed CSV answers: {c_answers}")
157
+ return jsonify({"answers": c_answers}), 200
158
+ except UnicodeDecodeError:
159
+ log_print("Invalid CSV file encoding", "ERROR")
160
+ return jsonify({"error": "Invalid CSV file encoding"}), 400
161
 
162
  c_answers = []
163
 
 
221
  log_print("No answers provided", "ERROR")
222
  return jsonify({"error": "No answers provided"}), 400
223
 
224
+ try:
225
+ log_print("=== Processing Answers ===")
226
+ log_print(f"Received answers: {a}")
227
+ a = json.loads(a)
228
+ answers = []
229
+ for i in a:
230
+ ans = i.split('\n\n')
231
+ answers.append(ans)
232
+ log_print(f"Processed answers structure: {answers}")
233
+ except json.JSONDecodeError:
234
+ log_print("Invalid JSON format in answers", "ERROR")
235
+ return jsonify({"error": "Invalid JSON format in answers"}), 400
236
+
237
+ # Add validation for answers
238
+ def validate_answers(answers):
239
+ if not isinstance(answers, list):
240
+ return False
241
+ if not all(isinstance(ans, str) for ans in answers):
242
+ return False
243
+ return True
244
+
245
+ if not validate_answers(answers):
246
+ log_print("Invalid answer format", "ERROR")
247
+ return jsonify({"error": "Invalid answer format"}), 400
248
 
249
  # Initialize data structure and parent folder
250
  data = {}
251
+ parent_folder = ans_image_dir # Use the temp directory path defined earlier
252
 
253
  # Check if answers exist
254
  if not answers:
 
260
  student_path = os.path.join(parent_folder, student_folder)
261
  if os.path.isdir(student_path):
262
  for image_file in os.listdir(student_path):
263
+ if image_file.endswith('.jpg'): # Correct syntax for single extension
264
  full_path = os.path.join(student_path, image_file).replace("\\", "/")
265
  if student_folder in data:
266
  data[student_folder].append(full_path)
 
314
  for student, marks_list in s_marks.items():
315
  log_print(f"{student}: {marks_list}")
316
 
317
+ # Add cleanup at the end
318
+ try:
319
+ import shutil
320
+ shutil.rmtree(ans_image_dir)
321
+ os.makedirs(ans_image_dir, exist_ok=True)
322
+ except Exception as e:
323
+ log_print(f"Warning: Could not clean up ans_image directory: {e}", "WARNING")
324
+
325
  return jsonify({"message": s_marks}), 200
326
 
327
  except Exception as e:
 
382
  except Exception as e:
383
  return jsonify({"error": str(e)})
384
 
385
+ # Add file type validation
386
+ def is_valid_image_file(filename):
387
+ valid_extensions = {'.jpg', '.jpeg', '.png'}
388
+ return os.path.splitext(filename)[1].lower() in valid_extensions
389
+
390
+ def allowed_file(filename, allowed_extensions):
391
+ return '.' in filename and \
392
+ filename.rsplit('.', 1)[1].lower() in allowed_extensions
393
+
394
+ def cleanup_temp_files():
395
+ """Clean up temporary files and directories"""
396
+ try:
397
+ import shutil
398
+ temp_dirs = [ans_image_dir, upload_dir, nltk_data_dir, gensim_data_dir]
399
+ for directory in temp_dirs:
400
+ if os.path.exists(directory):
401
+ shutil.rmtree(directory)
402
+ os.makedirs(directory, exist_ok=True)
403
+ log_print("Successfully cleaned up temporary files")
404
+ except Exception as e:
405
+ log_print(f"Error cleaning up temporary files: {e}", "ERROR")
406
+
407
  if __name__ == '__main__':
408
+ try:
409
+ # Get port from environment variable or use default
410
+ port = int(os.environ.get('PORT', 7860))
411
+ # Use 0.0.0.0 for Hugging Face
412
+ app.run(host='0.0.0.0', port=port)
413
+ finally:
414
+ cleanup_temp_files()