|
import os |
|
import tempfile |
|
import nltk |
|
import logging |
|
import sys |
|
import builtins |
|
from datetime import datetime |
|
from flask_cors import CORS |
|
from flask import Flask, request, jsonify, render_template, make_response |
|
from werkzeug.utils import secure_filename |
|
from dotenv import load_dotenv |
|
import json |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
BASE_DIR = '/tmp' |
|
log_dir = os.path.join(BASE_DIR, 'app_logs') |
|
cache_dir = os.path.join(BASE_DIR, 'app_cache') |
|
|
|
def ensure_directory(path): |
|
"""Create directory and ensure full permissions""" |
|
try: |
|
os.makedirs(path, mode=0o777, exist_ok=True) |
|
os.chmod(path, 0o777) |
|
except Exception as e: |
|
print(f"Warning: Could not set permissions for {path}: {e}") |
|
|
|
|
|
ensure_directory(log_dir) |
|
ensure_directory(cache_dir) |
|
|
|
|
|
log_file = os.path.join(log_dir, f'app_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log') |
|
try: |
|
|
|
with open(log_file, 'w') as f: |
|
pass |
|
os.chmod(log_file, 0o666) |
|
except Exception as e: |
|
print(f"Warning: Could not create log file: {e}") |
|
log_file = os.path.join(BASE_DIR, 'app.log') |
|
with open(log_file, 'w') as f: |
|
pass |
|
os.chmod(log_file, 0o666) |
|
|
|
|
|
try: |
|
logging.basicConfig( |
|
level=logging.INFO, |
|
format='%(asctime)s - %(levelname)s - %(message)s', |
|
handlers=[ |
|
logging.FileHandler(log_file), |
|
logging.StreamHandler(sys.stdout) |
|
] |
|
) |
|
except Exception as e: |
|
print(f"Warning: Could not set up file logging: {e}") |
|
|
|
logging.basicConfig( |
|
level=logging.INFO, |
|
format='%(asctime)s - %(levelname)s - %(message)s', |
|
handlers=[logging.StreamHandler(sys.stdout)] |
|
) |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
def log_print(message, level="INFO"): |
|
|
|
builtins.print(message, flush=True) |
|
if level == "INFO": |
|
logger.info(message) |
|
elif level == "ERROR": |
|
logger.error(message) |
|
elif level == "WARNING": |
|
logger.warning(message) |
|
|
|
|
|
try: |
|
nltk_data_dir = os.path.join(BASE_DIR, 'nltk_data') |
|
gensim_data_dir = os.path.join(BASE_DIR, 'gensim-data') |
|
upload_dir = os.path.join(BASE_DIR, 'uploads') |
|
ans_image_dir = os.path.join(BASE_DIR, 'ans_image') |
|
images_dir = os.path.join(BASE_DIR, 'images') |
|
|
|
|
|
for directory in [nltk_data_dir, gensim_data_dir, upload_dir, ans_image_dir, images_dir]: |
|
ensure_directory(directory) |
|
|
|
except Exception as e: |
|
print(f"Warning: Could not create cache directories: {e}") |
|
|
|
for dirname in ['nltk_data', 'gensim-data', 'uploads', 'ans_image', 'images']: |
|
path = os.path.join(BASE_DIR, dirname) |
|
ensure_directory(path) |
|
|
|
|
|
os.environ['HF_HOME'] = cache_dir |
|
os.environ['GENSIM_DATA_DIR'] = gensim_data_dir |
|
|
|
|
|
nltk.data.path.insert(0, nltk_data_dir) |
|
|
|
|
|
required_nltk_data = ['stopwords', 'punkt', 'wordnet'] |
|
for data in required_nltk_data: |
|
try: |
|
log_print(f"Downloading NLTK data: {data}") |
|
nltk.download(data, download_dir=nltk_data_dir) |
|
except Exception as e: |
|
log_print(f"Error downloading NLTK data {data}: {e}", "ERROR") |
|
raise |
|
|
|
from flask import Flask, request, jsonify, render_template |
|
import json |
|
import torch |
|
from werkzeug.utils import secure_filename |
|
|
|
from HTR.app import extract_text_from_image |
|
from correct_answer_generation.answer_generation_database_creation import database_creation, answer_generation |
|
from similarity_check.tf_idf.tf_idf_score import create_tfidf_values, tfidf_answer_score |
|
from similarity_check.semantic_meaning_check.semantic import question_vector_sentence, question_vector_word, fasttext_similarity |
|
from similarity_check.llm_based_scoring.llm import llm_score |
|
|
|
app = Flask(__name__) |
|
app.config['JSON_SORT_KEYS'] = False |
|
app.config['JSONIFY_PRETTYPRINT_REGULAR'] = False |
|
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024 |
|
|
|
|
|
UPLOAD_FOLDER = tempfile.mkdtemp() |
|
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER |
|
logger.info(f"Using temporary upload folder: {UPLOAD_FOLDER}") |
|
|
|
|
|
CORS(app, resources={ |
|
r"/*": { |
|
"origins": "*", |
|
"methods": ["GET", "POST", "OPTIONS"], |
|
"allow_headers": ["Content-Type", "Authorization", "Accept"], |
|
"expose_headers": ["Content-Type"] |
|
} |
|
}) |
|
|
|
|
|
@app.errorhandler(Exception) |
|
def handle_exception(e): |
|
|
|
app.logger.error(f"Unhandled exception: {str(e)}") |
|
return jsonify({ |
|
"error": "Internal server error", |
|
"message": str(e) |
|
}), 500 |
|
|
|
|
|
@app.errorhandler(404) |
|
def not_found_error(error): |
|
return jsonify({ |
|
"error": "Not found", |
|
"message": "The requested resource was not found" |
|
}), 404 |
|
|
|
|
|
@app.errorhandler(400) |
|
def bad_request_error(error): |
|
return jsonify({ |
|
"error": "Bad request", |
|
"message": str(error) |
|
}), 400 |
|
|
|
@app.route('/') |
|
def index(): |
|
try: |
|
response = make_response(render_template('index.html')) |
|
response.headers['Content-Type'] = 'text/html; charset=utf-8' |
|
return response |
|
except Exception as e: |
|
return jsonify({"error": str(e)}), 500 |
|
|
|
def new_value(value, old_min, old_max, new_min, new_max): |
|
new_value = new_min + ((value - old_min) * (new_max - new_min)) / (old_max - old_min) |
|
return new_value |
|
|
|
@app.route('/compute_answers', methods=['POST']) |
|
def compute_answers(): |
|
try: |
|
log_print("\n=== Starting Answer Computation ===") |
|
|
|
query_file = request.files.get('query_file') |
|
if not query_file: |
|
log_print("Missing query file", "ERROR") |
|
return jsonify({"error": "Missing query file"}), 400 |
|
|
|
try: |
|
queries = query_file.read().decode('utf-8').splitlines() |
|
if not queries: |
|
log_print("No queries found in file", "ERROR") |
|
return jsonify({"error": "No queries found in file"}), 400 |
|
log_print(f"Received queries: {queries}") |
|
except UnicodeDecodeError: |
|
log_print("Invalid file encoding", "ERROR") |
|
return jsonify({"error": "Invalid file encoding"}), 400 |
|
|
|
file_type = request.form.get('file_type') |
|
if not file_type: |
|
log_print("Missing file type", "ERROR") |
|
return jsonify({"error": "Missing file type"}), 400 |
|
|
|
ans_csv_file = request.files.get('ans_csv_file') |
|
|
|
if file_type == "csv": |
|
if not ans_csv_file: |
|
log_print("Missing answer CSV file", "ERROR") |
|
return jsonify({"error": "Missing answer CSV file"}), 400 |
|
|
|
try: |
|
ans_csv_file = ans_csv_file.read().decode('utf-8').splitlines() |
|
c_answers = [] |
|
for i in ans_csv_file: |
|
c_answers.append(i.split('\\n')) |
|
log_print(f"Processed CSV answers: {c_answers}") |
|
return jsonify({"answers": c_answers}), 200 |
|
except UnicodeDecodeError: |
|
log_print("Invalid CSV file encoding", "ERROR") |
|
return jsonify({"error": "Invalid CSV file encoding"}), 400 |
|
|
|
c_answers = [] |
|
|
|
if file_type == 'pdf': |
|
|
|
pdf_dir = os.path.join(cache_dir, 'pdf_files') |
|
os.makedirs(pdf_dir, exist_ok=True) |
|
|
|
|
|
pdf_files = [] |
|
for file in request.files.getlist('pdf_files[]'): |
|
if file.filename.endswith('.pdf'): |
|
filename = secure_filename(file.filename) |
|
filepath = os.path.join(pdf_dir, filename) |
|
file.save(filepath) |
|
pdf_files.append(filepath) |
|
|
|
if not pdf_files: |
|
log_print("No PDF files uploaded", "ERROR") |
|
return jsonify({"error": "No PDF files uploaded"}), 400 |
|
|
|
log_print(f"Processing {len(pdf_files)} PDF files") |
|
|
|
|
|
for pdf_file in pdf_files: |
|
database_creation(pdf_file) |
|
|
|
|
|
for query in queries: |
|
ans = [] |
|
for pdf_file in pdf_files: |
|
ans.append(answer_generation(pdf_file, query)) |
|
c_answers.append(ans) |
|
|
|
|
|
try: |
|
import shutil |
|
shutil.rmtree(pdf_dir) |
|
except Exception as e: |
|
log_print(f"Warning: Could not clean up PDF directory: {e}", "WARNING") |
|
|
|
else: |
|
log_print(f"Unsupported file type: {file_type}", "ERROR") |
|
return jsonify({"error": "Unsupported file type"}), 400 |
|
|
|
log_print(f"Generated answers: {c_answers}") |
|
return jsonify({"answers": c_answers}), 200 |
|
|
|
except Exception as e: |
|
log_print(f"Error in compute_answers: {str(e)}", "ERROR") |
|
error_msg = str(e).encode('ascii', 'ignore').decode('ascii') |
|
return jsonify({"error": error_msg}), 500 |
|
|
|
def validate_folder_structure(files): |
|
"""Validate the folder structure of uploaded files""" |
|
try: |
|
|
|
student_folders = set() |
|
for file in files: |
|
if not file or not file.filename: |
|
continue |
|
path_parts = file.filename.split('/') |
|
if len(path_parts) >= 2: |
|
student_folders.add(path_parts[-2]) |
|
|
|
if not student_folders: |
|
return False, "No valid student folders found" |
|
|
|
|
|
file_counts = {} |
|
for file in files: |
|
if not file or not file.filename: |
|
continue |
|
path_parts = file.filename.split('/') |
|
if len(path_parts) >= 2: |
|
student = path_parts[-2] |
|
file_counts[student] = file_counts.get(student, 0) + 1 |
|
|
|
if not file_counts: |
|
return False, "No valid files found in student folders" |
|
|
|
|
|
counts = list(file_counts.values()) |
|
if len(set(counts)) > 1: |
|
return False, "Inconsistent number of files across student folders" |
|
|
|
return True, f"Valid folder structure with {len(student_folders)} students and {counts[0]} files each" |
|
|
|
except Exception as e: |
|
return False, f"Error validating folder structure: {str(e)}" |
|
|
|
@app.route('/compute_marks', methods=['POST']) |
|
def compute_marks(): |
|
try: |
|
|
|
correct_answers = request.form.getlist('correct_answers[]') |
|
log_print(f"Received correct answers: {correct_answers}") |
|
|
|
if not correct_answers: |
|
log_print("No correct answers provided", "ERROR") |
|
return jsonify({ |
|
"error": "Missing data", |
|
"message": "No correct answers provided" |
|
}), 400 |
|
|
|
|
|
try: |
|
max_tfidf = create_tfidf_values(correct_answers) |
|
log_print("Created TFIDF values successfully") |
|
except Exception as e: |
|
log_print(f"TFIDF error: {str(e)}", "ERROR") |
|
return jsonify({ |
|
"error": "TFIDF error", |
|
"message": f"Error creating TFIDF values: {str(e)}" |
|
}), 400 |
|
|
|
|
|
files = request.files.getlist('file') |
|
log_print(f"Received {len(files)} files") |
|
|
|
if not files: |
|
log_print("No files uploaded", "ERROR") |
|
return jsonify({ |
|
"error": "Missing data", |
|
"message": "No files uploaded" |
|
}), 400 |
|
|
|
|
|
is_valid, message = validate_folder_structure(files) |
|
log_print(f"Folder structure validation: {message}") |
|
|
|
if not is_valid: |
|
log_print(f"Invalid folder structure: {message}", "ERROR") |
|
return jsonify({ |
|
"error": "Invalid folder structure", |
|
"message": message |
|
}), 400 |
|
|
|
|
|
base_temp_dir = tempfile.mkdtemp() |
|
log_print(f"Created temporary directory: {base_temp_dir}") |
|
|
|
|
|
results = {} |
|
failed_files = [] |
|
|
|
try: |
|
|
|
for file in files: |
|
try: |
|
|
|
if not file or not file.filename: |
|
log_print("Skipping invalid file", "WARNING") |
|
continue |
|
|
|
log_print(f"Processing file: {file.filename}") |
|
|
|
|
|
path_parts = file.filename.split('/') |
|
if len(path_parts) < 2: |
|
log_print(f"Invalid file path structure: {file.filename}", "WARNING") |
|
continue |
|
|
|
student_folder = path_parts[-2] |
|
filename = path_parts[-1] |
|
log_print(f"Student folder: {student_folder}, Filename: {filename}") |
|
|
|
|
|
if not is_valid_image_file(filename): |
|
log_print(f"Invalid file type: {filename}", "WARNING") |
|
failed_files.append({ |
|
"file": file.filename, |
|
"error": "Invalid file type. Only .jpg, .jpeg, and .png files are allowed." |
|
}) |
|
continue |
|
|
|
|
|
if student_folder not in results: |
|
results[student_folder] = {} |
|
log_print(f"Initialized results for student: {student_folder}") |
|
|
|
|
|
student_dir = os.path.join(base_temp_dir, student_folder) |
|
os.makedirs(student_dir, exist_ok=True) |
|
filepath = os.path.join(student_dir, filename) |
|
|
|
|
|
filepath = secure_filename(filepath) |
|
file.save(filepath) |
|
log_print(f"Saved file to: {filepath}") |
|
|
|
|
|
extracted_text = extract_text_from_image(filepath) |
|
if not extracted_text: |
|
log_print(f"No text extracted from: {filepath}", "WARNING") |
|
failed_files.append({ |
|
"file": file.filename, |
|
"error": "No text could be extracted from the image" |
|
}) |
|
continue |
|
|
|
log_print(f"Extracted text: {extracted_text[:100]}...") |
|
|
|
|
|
extracted_text = extracted_text.encode('ascii', 'ignore').decode('ascii') |
|
|
|
|
|
best_score = 0 |
|
best_answer_index = 0 |
|
|
|
for i, correct_answer in enumerate(correct_answers): |
|
try: |
|
|
|
clean_correct_answer = correct_answer.encode('ascii', 'ignore').decode('ascii') |
|
|
|
|
|
semantic_score = question_vector_sentence(extracted_text, clean_correct_answer) |
|
word_score = question_vector_word(extracted_text, clean_correct_answer) |
|
tfidf_score = tfidf_answer_score(extracted_text, clean_correct_answer, max_tfidf) |
|
ft_score = fasttext_similarity(extracted_text, clean_correct_answer) |
|
llm_marks = llm_score(extracted_text, clean_correct_answer) |
|
|
|
combined_score = ( |
|
semantic_score * 0.3 + |
|
word_score * 0.2 + |
|
tfidf_score * 0.2 + |
|
ft_score * 0.2 + |
|
llm_marks * 0.1 |
|
) |
|
|
|
log_print(f"Scores for answer {i+1}: semantic={semantic_score}, word={word_score}, tfidf={tfidf_score}, ft={ft_score}, llm={llm_marks}, combined={combined_score}") |
|
|
|
if combined_score > best_score: |
|
best_score = combined_score |
|
best_answer_index = i |
|
|
|
except Exception as score_error: |
|
error_msg = str(score_error).encode('ascii', 'ignore').decode('ascii') |
|
log_print(f"Error calculating scores for {filepath}: {error_msg}", "ERROR") |
|
failed_files.append({ |
|
"file": file.filename, |
|
"error": f"Error calculating scores: {error_msg}" |
|
}) |
|
continue |
|
|
|
marks = new_value(best_score, 0, 1, 0, 5) |
|
results[student_folder][filename] = round(marks, 2) |
|
log_print(f"Assigned marks for {filename} in {student_folder}: {marks}") |
|
|
|
except Exception as e: |
|
error_msg = str(e).encode('ascii', 'ignore').decode('ascii') |
|
log_print(f"Error processing file {file.filename}: {error_msg}", "ERROR") |
|
failed_files.append({ |
|
"file": file.filename, |
|
"error": error_msg |
|
}) |
|
continue |
|
|
|
finally: |
|
|
|
try: |
|
shutil.rmtree(base_temp_dir) |
|
log_print("Cleaned up temporary directory") |
|
except Exception as e: |
|
log_print(f"Warning: Could not clean up temporary directory: {e}", "WARNING") |
|
|
|
if not results: |
|
log_print("No results computed", "ERROR") |
|
return jsonify({ |
|
"error": "Processing error", |
|
"message": "No results computed" |
|
}), 400 |
|
|
|
|
|
clean_results = {} |
|
for student, scores in results.items(): |
|
clean_student = student.encode('ascii', 'ignore').decode('ascii') |
|
clean_scores = {} |
|
for filename, mark in scores.items(): |
|
clean_filename = filename.encode('ascii', 'ignore').decode('ascii') |
|
clean_scores[clean_filename] = mark |
|
clean_results[clean_student] = clean_scores |
|
|
|
log_print(f"Final results: {clean_results}") |
|
|
|
response_data = { |
|
"results": clean_results, |
|
"failed_files": [{ |
|
"file": f["file"].encode('ascii', 'ignore').decode('ascii'), |
|
"error": f["error"].encode('ascii', 'ignore').decode('ascii') |
|
} for f in failed_files] |
|
} |
|
|
|
log_print(f"Sending response: {response_data}") |
|
|
|
response = jsonify(response_data) |
|
response.headers['Content-Type'] = 'application/json' |
|
return response |
|
|
|
except Exception as e: |
|
error_msg = str(e).encode('ascii', 'ignore').decode('ascii') |
|
log_print(f"Error in compute_marks: {error_msg}", "ERROR") |
|
return jsonify({ |
|
"error": "Server error", |
|
"message": f"Error computing marks: {error_msg}" |
|
}), 500 |
|
|
|
@app.route('/check_logs') |
|
def check_logs(): |
|
try: |
|
with open(log_file, 'r') as f: |
|
logs = f.read() |
|
return jsonify({"logs": logs}) |
|
except Exception as e: |
|
return jsonify({"error": str(e)}) |
|
|
|
def is_valid_image_file(filename): |
|
"""Validate image file extensions and basic format""" |
|
try: |
|
|
|
valid_extensions = {'.jpg', '.jpeg', '.png'} |
|
ext = os.path.splitext(filename)[1].lower() |
|
if ext not in valid_extensions: |
|
return False |
|
|
|
return True |
|
except Exception: |
|
return False |
|
|
|
def allowed_file(filename, allowed_extensions): |
|
return '.' in filename and \ |
|
filename.rsplit('.', 1)[1].lower() in allowed_extensions |
|
|
|
def cleanup_temp_files(): |
|
"""Clean up temporary files with proper error handling""" |
|
try: |
|
if os.path.exists(images_dir): |
|
|
|
try: |
|
os.chmod(images_dir, 0o777) |
|
except Exception as e: |
|
log_print(f"Warning: Could not set directory permissions: {e}", "WARNING") |
|
|
|
|
|
for file in os.listdir(images_dir): |
|
file_path = os.path.join(images_dir, file) |
|
try: |
|
if os.path.isfile(file_path): |
|
try: |
|
os.chmod(file_path, 0o666) |
|
except: |
|
pass |
|
os.unlink(file_path) |
|
except Exception as e: |
|
log_print(f"Warning: Could not delete file {file_path}: {e}", "WARNING") |
|
continue |
|
|
|
|
|
ensure_directory(images_dir) |
|
|
|
log_print("Successfully cleaned up temporary files") |
|
except Exception as e: |
|
log_print(f"Error cleaning up temporary files: {e}", "ERROR") |
|
|
|
if __name__ == '__main__': |
|
try: |
|
|
|
for directory in [log_dir, cache_dir, nltk_data_dir, gensim_data_dir, upload_dir, ans_image_dir, images_dir]: |
|
ensure_directory(directory) |
|
|
|
port = int(os.environ.get('PORT', 7860)) |
|
app.run(host='0.0.0.0', port=port, debug=False) |
|
finally: |
|
cleanup_temp_files() |