Spaces:

yamanavijayavardhan
/

answer-grading-app

Sleeping

App Files Files Community

yamanavijayavardhan commited on Apr 1

Commit

666fb5d

1 Parent(s): 43ce63b

update_

Browse files

Files changed (2) hide show

README.md +45 -2
main.py +128 -44

README.md CHANGED Viewed

@@ -6,7 +6,50 @@ colorTo: green
 sdk: docker
 pinned: false
 license: apache-2.0
-short_description: A application for automated descriptive answer evaluation
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 sdk: docker
 pinned: false
 license: apache-2.0
+app_port: 7860
 ---
+# Answer Grading App
+This is an AI-powered answer grading application that can grade student answers using multiple similarity metrics and LLM-based scoring.
+## Features
+- Support for PDF and CSV input formats
+- Multiple similarity metrics:
+  - TF-IDF scoring
+  - Semantic similarity
+  - Word-level similarity
+  - LLM-based scoring
+- Handwritten text recognition (HTR) support
+- Rate limiting and CORS protection
+- Comprehensive logging
+## Usage
+1. Upload your query file (text file with questions)
+2. Choose input format (PDF or CSV)
+3. Upload answer files (PDFs or CSV)
+4. Get graded results
+## API Endpoints
+- `/`: Main interface
+- `/compute_answers`: Process and generate answers
+- `/compute_marks`: Calculate marks for student answers
+- `/check_logs`: View application logs
+## Environment Variables
+- `PORT`: Server port (default: 7860)
+- `HF_HOME`: Hugging Face cache directory
+- `GENSIM_DATA_DIR`: Gensim data directory
+## Rate Limits
+- 200 requests per day
+- 50 requests per hour
+## Dependencies
+See `requirements.txt` for full list of dependencies.

main.py CHANGED Viewed

@@ -5,18 +5,13 @@ import logging
 import sys
 import builtins
 from datetime import datetime
-# Set up logging to write to stdout
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(message)s',  # Simplified format for better readability
-    handlers=[
-        logging.StreamHandler(sys.stdout)
-    ]
-)
-# Create a custom logger
-logger = logging.getLogger(__name__)
 # Create a logs directory in the temp folder
 log_dir = os.path.join(tempfile.gettempdir(), 'app_logs')
@@ -25,7 +20,7 @@ os.makedirs(log_dir, exist_ok=True)
 # Create a log file with timestamp
 log_file = os.path.join(log_dir, f'app_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log')
-# Set up logging to both file and console
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s',
@@ -35,6 +30,9 @@ logging.basicConfig(
     ]
 )
 # Add a print function that also logs
 def log_print(message, level="INFO"):
     # Use the original print function to avoid recursion
@@ -50,30 +48,33 @@ def log_print(message, level="INFO"):
 cache_dir = tempfile.mkdtemp()
 nltk_data_dir = os.path.join(cache_dir, 'nltk_data')
 gensim_data_dir = os.path.join(cache_dir, 'gensim-data')
-upload_dir = os.path.join(cache_dir, 'uploads')  # New upload directory in /tmp
-ans_image_dir = os.path.join(cache_dir, 'ans_image')  # Move ans_image to temp directory
 # Set environment variables
 os.environ['HF_HOME'] = cache_dir
 os.environ['GENSIM_DATA_DIR'] = gensim_data_dir
 # Create directories with correct permissions
-os.makedirs(nltk_data_dir, exist_ok=True)
-os.makedirs(gensim_data_dir, exist_ok=True)
-os.makedirs(upload_dir, exist_ok=True)
-os.makedirs(ans_image_dir, exist_ok=True)  # Create ans_image in temp directory
 # Add the custom directory to NLTK's search path
 nltk.data.path.insert(0, nltk_data_dir)
 # Download required NLTK data
-try:
-    log_print("Downloading required NLTK data...")
-    nltk.download('stopwords', download_dir=nltk_data_dir)
-    nltk.download('punkt', download_dir=nltk_data_dir)
-    nltk.download('wordnet', download_dir=nltk_data_dir)
-except Exception as e:
-    log_print(f"Error downloading NLTK data: {e}", "ERROR")
 from flask import Flask, request, jsonify, render_template
 import json
@@ -91,6 +92,22 @@ app = Flask(__name__)
 # Use the new upload directory
 UPLOAD_FOLDER = upload_dir
 @app.route('/')
 def index():
     return render_template('index.html')
@@ -109,10 +126,21 @@ def compute_answers():
             log_print("Missing query file", "ERROR")
             return jsonify({"error": "Missing query file"}), 400
-        queries = query_file.read().decode('utf-8').splitlines()
-        log_print(f"Received queries: {queries}")
         file_type = request.form.get('file_type')
         ans_csv_file = request.files.get('ans_csv_file')
         if file_type == "csv":
@@ -120,12 +148,16 @@ def compute_answers():
                 log_print("Missing answer CSV file", "ERROR")
                 return jsonify({"error": "Missing answer CSV file"}), 400
-            ans_csv_file = ans_csv_file.read().decode('utf-8').splitlines()
-            c_answers = []
-            for i in ans_csv_file:
-                c_answers.append(i.split('\\n'))
-            log_print(f"Processed CSV answers: {c_answers}")
-            return jsonify({"answers": c_answers}), 200
         c_answers = []
@@ -189,18 +221,34 @@ def compute_marks():
             log_print("No answers provided", "ERROR")
             return jsonify({"error": "No answers provided"}), 400
-        log_print("=== Processing Answers ===")
-        log_print(f"Received answers: {a}")
-        a = json.loads(a)
-        answers = []
-        for i in a:
-            ans = i.split('\n\n')
-            answers.append(ans)
-        log_print(f"Processed answers structure: {answers}")
         # Initialize data structure and parent folder
         data = {}
-        parent_folder = "ans_image"  # Changed from cache_dir to match reference code
         # Check if answers exist
         if not answers:
@@ -212,7 +260,7 @@ def compute_marks():
             student_path = os.path.join(parent_folder, student_folder)
             if os.path.isdir(student_path):
                 for image_file in os.listdir(student_path):
-                    if image_file.endswith(('.jpg')):  # Changed to match reference code
                         full_path = os.path.join(student_path, image_file).replace("\\", "/")
                         if student_folder in data:
                             data[student_folder].append(full_path)
@@ -266,6 +314,14 @@ def compute_marks():
         for student, marks_list in s_marks.items():
             log_print(f"{student}: {marks_list}")
         return jsonify({"message": s_marks}), 200
     except Exception as e:
@@ -326,5 +382,33 @@ def check_logs():
     except Exception as e:
         return jsonify({"error": str(e)})
 if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=7860)

 import sys
 import builtins
 from datetime import datetime
+from flask_limiter import Limiter
+from flask_limiter.util import get_remote_address
+from flask_cors import CORS
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
 # Create a logs directory in the temp folder
 log_dir = os.path.join(tempfile.gettempdir(), 'app_logs')
 # Create a log file with timestamp
 log_file = os.path.join(log_dir, f'app_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log')
+# Set up logging to both file and console (single configuration)
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s',
     ]
 )
+# Create a custom logger
+logger = logging.getLogger(__name__)
 # Add a print function that also logs
 def log_print(message, level="INFO"):
     # Use the original print function to avoid recursion
 cache_dir = tempfile.mkdtemp()
 nltk_data_dir = os.path.join(cache_dir, 'nltk_data')
 gensim_data_dir = os.path.join(cache_dir, 'gensim-data')
+upload_dir = os.path.join(cache_dir, 'uploads')
+ans_image_dir = os.path.join(cache_dir, 'ans_image')
 # Set environment variables
 os.environ['HF_HOME'] = cache_dir
 os.environ['GENSIM_DATA_DIR'] = gensim_data_dir
 # Create directories with correct permissions
+for directory in [nltk_data_dir, gensim_data_dir, upload_dir, ans_image_dir]:
+    try:
+        os.makedirs(directory, exist_ok=True)
+    except Exception as e:
+        log_print(f"Error creating directory {directory}: {e}", "ERROR")
+        raise
 # Add the custom directory to NLTK's search path
 nltk.data.path.insert(0, nltk_data_dir)
 # Download required NLTK data
+required_nltk_data = ['stopwords', 'punkt', 'wordnet']
+for data in required_nltk_data:
+    try:
+        log_print(f"Downloading NLTK data: {data}")
+        nltk.download(data, download_dir=nltk_data_dir)
+    except Exception as e:
+        log_print(f"Error downloading NLTK data {data}: {e}", "ERROR")
+        raise
 from flask import Flask, request, jsonify, render_template
 import json
 # Use the new upload directory
 UPLOAD_FOLDER = upload_dir
+# Configure CORS for Hugging Face
+CORS(app, resources={
+    r"/*": {
+        "origins": ["*"],
+        "methods": ["GET", "POST", "OPTIONS"],
+        "allow_headers": ["Content-Type", "Authorization"]
+    }
+})
+# Initialize rate limiter
+limiter = Limiter(
+    app=app,
+    key_func=get_remote_address,
+    default_limits=["200 per day", "50 per hour"]
+)
 @app.route('/')
 def index():
     return render_template('index.html')
             log_print("Missing query file", "ERROR")
             return jsonify({"error": "Missing query file"}), 400
+        try:
+            queries = query_file.read().decode('utf-8').splitlines()
+            if not queries:
+                log_print("No queries found in file", "ERROR")
+                return jsonify({"error": "No queries found in file"}), 400
+            log_print(f"Received queries: {queries}")
+        except UnicodeDecodeError:
+            log_print("Invalid file encoding", "ERROR")
+            return jsonify({"error": "Invalid file encoding"}), 400
         file_type = request.form.get('file_type')
+        if not file_type:
+            log_print("Missing file type", "ERROR")
+            return jsonify({"error": "Missing file type"}), 400
         ans_csv_file = request.files.get('ans_csv_file')
         if file_type == "csv":
                 log_print("Missing answer CSV file", "ERROR")
                 return jsonify({"error": "Missing answer CSV file"}), 400
+            try:
+                ans_csv_file = ans_csv_file.read().decode('utf-8').splitlines()
+                c_answers = []
+                for i in ans_csv_file:
+                    c_answers.append(i.split('\\n'))
+                log_print(f"Processed CSV answers: {c_answers}")
+                return jsonify({"answers": c_answers}), 200
+            except UnicodeDecodeError:
+                log_print("Invalid CSV file encoding", "ERROR")
+                return jsonify({"error": "Invalid CSV file encoding"}), 400
         c_answers = []
             log_print("No answers provided", "ERROR")
             return jsonify({"error": "No answers provided"}), 400
+        try:
+            log_print("=== Processing Answers ===")
+            log_print(f"Received answers: {a}")
+            a = json.loads(a)
+            answers = []
+            for i in a:
+                ans = i.split('\n\n')
+                answers.append(ans)
+            log_print(f"Processed answers structure: {answers}")
+        except json.JSONDecodeError:
+            log_print("Invalid JSON format in answers", "ERROR")
+            return jsonify({"error": "Invalid JSON format in answers"}), 400
+        # Add validation for answers
+        def validate_answers(answers):
+            if not isinstance(answers, list):
+                return False
+            if not all(isinstance(ans, str) for ans in answers):
+                return False
+            return True
+        if not validate_answers(answers):
+            log_print("Invalid answer format", "ERROR")
+            return jsonify({"error": "Invalid answer format"}), 400
         # Initialize data structure and parent folder
         data = {}
+        parent_folder = ans_image_dir  # Use the temp directory path defined earlier
         # Check if answers exist
         if not answers:
             student_path = os.path.join(parent_folder, student_folder)
             if os.path.isdir(student_path):
                 for image_file in os.listdir(student_path):
+                    if image_file.endswith('.jpg'):  # Correct syntax for single extension
                         full_path = os.path.join(student_path, image_file).replace("\\", "/")
                         if student_folder in data:
                             data[student_folder].append(full_path)
         for student, marks_list in s_marks.items():
             log_print(f"{student}: {marks_list}")
+        # Add cleanup at the end
+        try:
+            import shutil
+            shutil.rmtree(ans_image_dir)
+            os.makedirs(ans_image_dir, exist_ok=True)
+        except Exception as e:
+            log_print(f"Warning: Could not clean up ans_image directory: {e}", "WARNING")
         return jsonify({"message": s_marks}), 200
     except Exception as e:
     except Exception as e:
         return jsonify({"error": str(e)})
+# Add file type validation
+def is_valid_image_file(filename):
+    valid_extensions = {'.jpg', '.jpeg', '.png'}
+    return os.path.splitext(filename)[1].lower() in valid_extensions
+def allowed_file(filename, allowed_extensions):
+    return '.' in filename and \
+           filename.rsplit('.', 1)[1].lower() in allowed_extensions
+def cleanup_temp_files():
+    """Clean up temporary files and directories"""
+    try:
+        import shutil
+        temp_dirs = [ans_image_dir, upload_dir, nltk_data_dir, gensim_data_dir]
+        for directory in temp_dirs:
+            if os.path.exists(directory):
+                shutil.rmtree(directory)
+                os.makedirs(directory, exist_ok=True)
+        log_print("Successfully cleaned up temporary files")
+    except Exception as e:
+        log_print(f"Error cleaning up temporary files: {e}", "ERROR")
 if __name__ == '__main__':
+    try:
+        # Get port from environment variable or use default
+        port = int(os.environ.get('PORT', 7860))
+        # Use 0.0.0.0 for Hugging Face
+        app.run(host='0.0.0.0', port=port)
+    finally:
+        cleanup_temp_files()