Spaces:

yamanavijayavardhan
/

answer-grading-app

Sleeping

App Files Files Community

answer-grading-app / main.py

yamanavijayavardhan

update_

666fb5d 4 months ago

raw

history blame

15.3 kB

	import os
	import tempfile
	import nltk
	import logging
	import sys
	import builtins
	from datetime import datetime
	from flask_limiter import Limiter
	from flask_limiter.util import get_remote_address
	from flask_cors import CORS
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()

	# Create a logs directory in the temp folder
	log_dir = os.path.join(tempfile.gettempdir(), 'app_logs')
	os.makedirs(log_dir, exist_ok=True)

	# Create a log file with timestamp
	log_file = os.path.join(log_dir, f'app_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log')

	# Set up logging to both file and console (single configuration)
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(levelname)s - %(message)s',
	handlers=[
	logging.FileHandler(log_file),
	logging.StreamHandler(sys.stdout)
	]
	)

	# Create a custom logger
	logger = logging.getLogger(__name__)

	# Add a print function that also logs
	def log_print(message, level="INFO"):
	# Use the original print function to avoid recursion
	builtins.print(message, flush=True)
	if level == "INFO":
	logger.info(message)
	elif level == "ERROR":
	logger.error(message)
	elif level == "WARNING":
	logger.warning(message)

	# Set up all cache and data directories in /tmp
	cache_dir = tempfile.mkdtemp()
	nltk_data_dir = os.path.join(cache_dir, 'nltk_data')
	gensim_data_dir = os.path.join(cache_dir, 'gensim-data')
	upload_dir = os.path.join(cache_dir, 'uploads')
	ans_image_dir = os.path.join(cache_dir, 'ans_image')

	# Set environment variables
	os.environ['HF_HOME'] = cache_dir
	os.environ['GENSIM_DATA_DIR'] = gensim_data_dir

	# Create directories with correct permissions
	for directory in [nltk_data_dir, gensim_data_dir, upload_dir, ans_image_dir]:
	try:
	os.makedirs(directory, exist_ok=True)
	except Exception as e:
	log_print(f"Error creating directory {directory}: {e}", "ERROR")
	raise

	# Add the custom directory to NLTK's search path
	nltk.data.path.insert(0, nltk_data_dir)

	# Download required NLTK data
	required_nltk_data = ['stopwords', 'punkt', 'wordnet']
	for data in required_nltk_data:
	try:
	log_print(f"Downloading NLTK data: {data}")
	nltk.download(data, download_dir=nltk_data_dir)
	except Exception as e:
	log_print(f"Error downloading NLTK data {data}: {e}", "ERROR")
	raise

	from flask import Flask, request, jsonify, render_template
	import json
	import torch
	from werkzeug.utils import secure_filename

	from HTR.app import extract_text_from_image
	from correct_answer_generation.answer_generation_database_creation import database_creation, answer_generation
	from similarity_check.tf_idf.tf_idf_score import create_tfidf_values, tfidf_answer_score
	from similarity_check.semantic_meaning_check.semantic import similarity_model_score, fasttext_similarity,question_vector_sentence,question_vector_word
	from similarity_check.llm_based_scoring.llm import llm_score

	app = Flask(__name__)

	# Use the new upload directory
	UPLOAD_FOLDER = upload_dir

	# Configure CORS for Hugging Face
	CORS(app, resources={
	r"/*": {
	"origins": ["*"],
	"methods": ["GET", "POST", "OPTIONS"],
	"allow_headers": ["Content-Type", "Authorization"]
	}
	})

	# Initialize rate limiter
	limiter = Limiter(
	app=app,
	key_func=get_remote_address,
	default_limits=["200 per day", "50 per hour"]
	)

	@app.route('/')
	def index():
	return render_template('index.html')

	def new_value(value, old_min, old_max, new_min, new_max):
	new_value = new_min + ((value - old_min) * (new_max - new_min)) / (old_max - old_min)
	return new_value

	@app.route('/compute_answers', methods=['POST'])
	def compute_answers():
	try:
	log_print("\n=== Starting Answer Computation ===")

	query_file = request.files.get('query_file')
	if not query_file:
	log_print("Missing query file", "ERROR")
	return jsonify({"error": "Missing query file"}), 400

	try:
	queries = query_file.read().decode('utf-8').splitlines()
	if not queries:
	log_print("No queries found in file", "ERROR")
	return jsonify({"error": "No queries found in file"}), 400
	log_print(f"Received queries: {queries}")
	except UnicodeDecodeError:
	log_print("Invalid file encoding", "ERROR")
	return jsonify({"error": "Invalid file encoding"}), 400

	file_type = request.form.get('file_type')
	if not file_type:
	log_print("Missing file type", "ERROR")
	return jsonify({"error": "Missing file type"}), 400

	ans_csv_file = request.files.get('ans_csv_file')

	if file_type == "csv":
	if not ans_csv_file:
	log_print("Missing answer CSV file", "ERROR")
	return jsonify({"error": "Missing answer CSV file"}), 400

	try:
	ans_csv_file = ans_csv_file.read().decode('utf-8').splitlines()
	c_answers = []
	for i in ans_csv_file:
	c_answers.append(i.split('\\n'))
	log_print(f"Processed CSV answers: {c_answers}")
	return jsonify({"answers": c_answers}), 200
	except UnicodeDecodeError:
	log_print("Invalid CSV file encoding", "ERROR")
	return jsonify({"error": "Invalid CSV file encoding"}), 400

	c_answers = []

	if file_type == 'pdf':
	# Create a temporary directory for PDF files
	pdf_dir = os.path.join(cache_dir, 'pdf_files')
	os.makedirs(pdf_dir, exist_ok=True)

	# Save uploaded PDF files
	pdf_files = []
	for file in request.files.getlist('pdf_files[]'):
	if file.filename.endswith('.pdf'):
	filename = secure_filename(file.filename)
	filepath = os.path.join(pdf_dir, filename)
	file.save(filepath)
	pdf_files.append(filepath)

	if not pdf_files:
	log_print("No PDF files uploaded", "ERROR")
	return jsonify({"error": "No PDF files uploaded"}), 400

	log_print(f"Processing {len(pdf_files)} PDF files")

	# Process PDFs
	for pdf_file in pdf_files:
	database_creation(pdf_file)

	# Generate answers
	for query in queries:
	ans = []
	for pdf_file in pdf_files:
	ans.append(answer_generation(pdf_file, query))
	c_answers.append(ans)

	# Clean up PDF directory
	try:
	import shutil
	shutil.rmtree(pdf_dir)
	except Exception as e:
	log_print(f"Warning: Could not clean up PDF directory: {e}", "WARNING")

	else:
	log_print(f"Unsupported file type: {file_type}", "ERROR")
	return jsonify({"error": "Unsupported file type"}), 400

	log_print(f"Generated answers: {c_answers}")
	return jsonify({"answers": c_answers}), 200

	except Exception as e:
	log_print(f"Error in compute_answers: {str(e)}", "ERROR")
	return jsonify({"error": str(e)}), 500

	@app.route('/compute_marks', methods=['POST'])
	def compute_marks():
	try:
	log_print("\n=== Starting Marks Computation ===")

	# Get and process answers
	a = request.form.get('answers')
	if not a:
	log_print("No answers provided", "ERROR")
	return jsonify({"error": "No answers provided"}), 400

	try:
	log_print("=== Processing Answers ===")
	log_print(f"Received answers: {a}")
	a = json.loads(a)
	answers = []
	for i in a:
	ans = i.split('\n\n')
	answers.append(ans)
	log_print(f"Processed answers structure: {answers}")
	except json.JSONDecodeError:
	log_print("Invalid JSON format in answers", "ERROR")
	return jsonify({"error": "Invalid JSON format in answers"}), 400

	# Add validation for answers
	def validate_answers(answers):
	if not isinstance(answers, list):
	return False
	if not all(isinstance(ans, str) for ans in answers):
	return False
	return True

	if not validate_answers(answers):
	log_print("Invalid answer format", "ERROR")
	return jsonify({"error": "Invalid answer format"}), 400

	# Initialize data structure and parent folder
	data = {}
	parent_folder = ans_image_dir # Use the temp directory path defined earlier

	# Check if answers exist
	if not answers:
	log_print("No answers found", "ERROR")
	return jsonify({"error": "Missing required files"}), 400

	# Process student folders and images
	for student_folder in os.listdir(parent_folder):
	student_path = os.path.join(parent_folder, student_folder)
	if os.path.isdir(student_path):
	for image_file in os.listdir(student_path):
	if image_file.endswith('.jpg'): # Correct syntax for single extension
	full_path = os.path.join(student_path, image_file).replace("\\", "/")
	if student_folder in data:
	data[student_folder].append(full_path)
	else:
	data[student_folder] = [full_path]

	# Initialize vectors for answers
	sen_vec_answers = []
	word_vec_answers = []
	for i in answers:
	temp_v = []
	temp_w = []
	for j in i:
	temp_v.append(question_vector_sentence(j))
	temp_w.append(question_vector_word(j))
	sen_vec_answers.append(temp_v)
	word_vec_answers.append(temp_w)

	# Calculate marks
	s_marks = {}
	for student_folder in data:
	s_marks[student_folder] = []
	count = 0
	for image_path in data[student_folder]:
	try:
	s_answer = extract_text_from_image(image_path)
	log_print(f"\nProcessing {student_folder}/{os.path.basename(image_path)}:")
	log_print(f"Extracted answer: {s_answer}")

	if s_answer and count < len(answers):
	log_print(f"Reference answer: {answers[count]}")
	tf_idf_word_values, max_tfidf = create_tfidf_values(answers[count])
	m = marks(s_answer, sen_vec_answers[count], word_vec_answers[count],
	tf_idf_word_values, max_tfidf, answers[count])

	if isinstance(m, torch.Tensor):
	m = m.item()
	s_marks[student_folder].append(round(float(m), 2))
	log_print(f"Marks awarded: {m}")
	else:
	s_marks[student_folder].append(0)
	log_print(f"No text extracted or no reference answer for index {count}", "WARNING")

	count += 1

	except Exception as e:
	log_print(f"Error processing {image_path}: {str(e)}", "ERROR")
	s_marks[student_folder].append(0)

	log_print("\nFinal Results:")
	for student, marks_list in s_marks.items():
	log_print(f"{student}: {marks_list}")

	# Add cleanup at the end
	try:
	import shutil
	shutil.rmtree(ans_image_dir)
	os.makedirs(ans_image_dir, exist_ok=True)
	except Exception as e:
	log_print(f"Warning: Could not clean up ans_image directory: {e}", "WARNING")

	return jsonify({"message": s_marks}), 200

	except Exception as e:
	log_print(f"Error in compute_marks: {str(e)}", "ERROR")
	return jsonify({"error": str(e)}), 500

	def marks(answer, sen_vec_answers, word_vec_answers, tf_idf_word_values, max_tfidf, correct_answers):
	marks = 0
	log_print("\n=== Marks Calculation ===")
	log_print(f"Processing answer: {answer[:100]}...")

	marks1 = tfidf_answer_score(answer, tf_idf_word_values, max_tfidf, marks=10)
	log_print(f"TFIDF Score: {marks1}")

	if marks1 > 3:
	marks += new_value(marks1, old_min=3, old_max=10, new_min=0, new_max=5)
	log_print(f"After TFIDF adjustment: {marks}")

	if marks1 > 2:
	marks2 = similarity_model_score(sen_vec_answers, answer)
	log_print(f"Sentence Similarity Score: {marks2}")

	if marks2 > 0.95:
	marks += 3
	elif marks2 > 0.5:
	marks += new_value(marks2, old_min=0.5, old_max=0.95, new_min=0, new_max=3)

	marks3 = fasttext_similarity(word_vec_answers, answer)
	log_print(f"Word Similarity Score: {marks3}")

	if marks3 > 0.9:
	marks += 2
	elif marks3 > 0.4:
	marks += new_value(marks3, old_min=0.4, old_max=0.9, new_min=0, new_max=2)

	marks4 = llm_score(correct_answers, answer)
	log_print(f"LLM Scores: {marks4}")

	for i in range(len(marks4)):
	marks4[i] = float(marks4[i])

	m = max(marks4)
	log_print(f"Max LLM Score: {m}")

	marks = marks/2 + m/2
	log_print(f"Final marks: {marks}")
	else:
	log_print("WARNING: TFIDF score too low, returning 0")

	return marks

	@app.route('/check_logs')
	def check_logs():
	try:
	with open(log_file, 'r') as f:
	logs = f.read()
	return jsonify({"logs": logs})
	except Exception as e:
	return jsonify({"error": str(e)})

	# Add file type validation
	def is_valid_image_file(filename):
	valid_extensions = {'.jpg', '.jpeg', '.png'}
	return os.path.splitext(filename)[1].lower() in valid_extensions

	def allowed_file(filename, allowed_extensions):
	return '.' in filename and \
	filename.rsplit('.', 1)[1].lower() in allowed_extensions

	def cleanup_temp_files():
	"""Clean up temporary files and directories"""
	try:
	import shutil
	temp_dirs = [ans_image_dir, upload_dir, nltk_data_dir, gensim_data_dir]
	for directory in temp_dirs:
	if os.path.exists(directory):
	shutil.rmtree(directory)
	os.makedirs(directory, exist_ok=True)
	log_print("Successfully cleaned up temporary files")
	except Exception as e:
	log_print(f"Error cleaning up temporary files: {e}", "ERROR")

	if __name__ == '__main__':
	try:
	# Get port from environment variable or use default
	port = int(os.environ.get('PORT', 7860))
	# Use 0.0.0.0 for Hugging Face
	app.run(host='0.0.0.0', port=port)
	finally:
	cleanup_temp_files()