Spaces:

yamanavijayavardhan
/

answer-grading-app

Sleeping

App Files Files Community

yamanavijayavardhan commited on Apr 2

Commit

8434b5d

1 Parent(s): 44fb620

update_new_new_new_new_new

Browse files

Files changed (2) hide show

HTR/strike.py +58 -53
similarity_check/semantic_meaning_check/semantic.py +87 -28

HTR/strike.py CHANGED Viewed

@@ -149,22 +149,70 @@ def process_without_model(image_paths):
         logger.error(f"Error in process_without_model: {str(e)}")
         return np.zeros(len(image_paths))  # Return all as not struck
 def struck_images(image_paths):
     try:
         if not image_paths:
             logger.error("No image paths provided")
             return []
         logger.info(f"Processing {len(image_paths)} images")
-        processed_paths = []
-        # Initialize model
-        model = initialize_model()
         for i, img_path in enumerate(image_paths):
             try:
-                # Read the image from the path
-                img = cv2.imread(img_path)
                 if img is None:
                     logger.error(f"Failed to read image: {img_path}")
                     continue
@@ -173,29 +221,15 @@ def struck_images(image_paths):
                 processed = process_single_image(img)
                 if processed is None:
                     continue
-                # Save the processed image
-                output_path = os.path.join(TEMP_IMAGES_DIR, f'processed_{i}.png')
-                cv2.imwrite(output_path, processed)
-                processed_paths.append(output_path)
             except Exception as e:
                 logger.error(f"Error processing image {img_path}: {str(e)}")
                 continue
-        # Get predictions
-        predictions = predict_image(processed_paths, model)
-        # Filter based on predictions
-        not_struck = []
-        for i, pred in enumerate(predictions):
-            if isinstance(pred, np.ndarray):
-                if pred.argmax() == 0:  # Not struck
-                    not_struck.append(processed_paths[i])
-            else:
-                if pred == 0:  # Not struck
-                    not_struck.append(processed_paths[i])
         logger.info(f"Found {len(not_struck)} non-struck images")
         return not_struck
@@ -203,33 +237,4 @@ def struck_images(image_paths):
         logger.error(f"Error in struck_images: {str(e)}")
         return []
-def process_single_image(img):
-    try:
-        # Convert to grayscale if needed
-        if len(img.shape) == 3:
-            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        # Enhance contrast
-        img = cv2.equalizeHist(img)
-        # Denoise
-        img = cv2.fastNlMeansDenoising(img)
-        # Apply adaptive thresholding
-        binary = cv2.adaptiveThreshold(
-            img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-            cv2.THRESH_BINARY, 21, 15
-        )
-        # Remove noise and smooth edges
-        kernel = np.ones((3,3), np.uint8)
-        binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
-        binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
-        return binary
-    except Exception as e:
-        logger.error(f"Error in process_single_image: {str(e)}")
-        return None
 # struck_images()

         logger.error(f"Error in process_without_model: {str(e)}")
         return np.zeros(len(image_paths))  # Return all as not struck
+def process_single_image(img):
+    try:
+        # Convert to grayscale if needed
+        if len(img.shape) == 3:
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        # Enhance contrast
+        img = cv2.equalizeHist(img)
+        # Denoise
+        img = cv2.fastNlMeansDenoising(img)
+        # Apply adaptive thresholding
+        binary = cv2.adaptiveThreshold(
+            img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+            cv2.THRESH_BINARY, 21, 15
+        )
+        # Remove noise and smooth edges
+        kernel = np.ones((3,3), np.uint8)
+        binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
+        binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
+        return binary
+    except Exception as e:
+        logger.error(f"Error in process_single_image: {str(e)}")
+        return None
+def check_strike_through(img):
+    """Check if an image contains strike-through lines"""
+    try:
+        # Convert to binary
+        _, binary = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+        # Look for horizontal lines
+        horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25, 1))
+        horizontal_lines = cv2.morphologyEx(binary, cv2.MORPH_OPEN, horizontal_kernel)
+        # Count pixels in horizontal lines
+        line_pixels = np.sum(horizontal_lines == 255)
+        total_pixels = img.shape[0] * img.shape[1]
+        # If more than 5% of pixels are part of horizontal lines, consider it struck through
+        return (line_pixels / total_pixels) > 0.05
+    except Exception as e:
+        logger.error(f"Error checking strike-through: {str(e)}")
+        return False
 def struck_images(image_paths):
+    """Process images and detect which ones are not struck through"""
     try:
         if not image_paths:
             logger.error("No image paths provided")
             return []
         logger.info(f"Processing {len(image_paths)} images")
+        not_struck = []
         for i, img_path in enumerate(image_paths):
             try:
+                # Read the image
+                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                 if img is None:
                     logger.error(f"Failed to read image: {img_path}")
                     continue
                 processed = process_single_image(img)
                 if processed is None:
                     continue
+                # Check if image is struck through
+                if not check_strike_through(processed):
+                    not_struck.append(img_path)
             except Exception as e:
                 logger.error(f"Error processing image {img_path}: {str(e)}")
                 continue
         logger.info(f"Found {len(not_struck)} non-struck images")
         return not_struck
         logger.error(f"Error in struck_images: {str(e)}")
         return []
 # struck_images()

similarity_check/semantic_meaning_check/semantic.py CHANGED Viewed

@@ -13,6 +13,93 @@ import os
 import tempfile
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
 from all_models import models
 # Use custom directory for gensim data
 gensim_data_dir = os.getenv('GENSIM_DATA_DIR', tempfile.gettempdir())
@@ -39,27 +126,6 @@ except Exception as e:
 # nltk.download('punkt')
 # nltk.download('stopwords')
-def question_vector_sentence(correct_answer):
-    try:
-        return models.similarity_model.encode(correct_answer, convert_to_tensor=True)
-    except Exception as e:
-        print(f"Error in question_vector_sentence: {str(e)}")
-        return None
-def similarity_model_score(correct_answer_vector, answer):
-    try:
-        if correct_answer_vector is None:
-            return 0.0
-        answer_embedding = models.similarity_model.encode(answer, convert_to_tensor=True)
-        cosine_score = float('-inf')
-        for i in correct_answer_vector:
-            cosine_score = max(cosine_score, util.pytorch_cos_sim(i, answer_embedding))
-        return float(cosine_score)  # Convert to float for JSON serialization
-    except Exception as e:
-        print(f"Error in similarity_model_score: {str(e)}")
-        return 0.0
 def preprocess(sentence):
     try:
         # Lowercase and remove punctuation
@@ -104,13 +170,6 @@ def compute_scm(tokens1, tokens2, model):
         print(f"Error in compute_scm: {str(e)}")
         return 0.5  # Return default similarity score
-def question_vector_word(correct_answer):
-    try:
-        return preprocess(correct_answer)
-    except Exception as e:
-        print(f"Error in question_vector_word: {str(e)}")
-        return []
 def fasttext_similarity(correct_answer_vector, answer):
     try:
         preprocess_answer = preprocess(answer)

 import tempfile
 sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
 from all_models import models
+from sentence_transformers import SentenceTransformer
+import torch
+import logging
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Global model variable
+model = None
+def initialize_model():
+    global model
+    try:
+        # Use a smaller, more efficient model
+        model_name = 'paraphrase-MiniLM-L6-v2'  # Only about 80MB
+        cache_dir = os.path.join(os.environ.get('TMPDIR', '/tmp'), 'model_cache')
+        os.makedirs(cache_dir, exist_ok=True)
+        model = SentenceTransformer(model_name, cache_folder=cache_dir)
+        logger.info(f"Loaded model: {model_name}")
+        return model
+    except Exception as e:
+        logger.error(f"Error loading model: {str(e)}")
+        return None
+def get_sentence_embedding(text):
+    try:
+        global model
+        if model is None:
+            model = initialize_model()
+        if model is None:
+            return None
+        # Get embeddings
+        embedding = model.encode(text, convert_to_tensor=True)
+        return embedding
+    except Exception as e:
+        logger.error(f"Error getting embedding: {str(e)}")
+        return None
+def similarity_model_score(student_answer, correct_answer):
+    try:
+        # Get embeddings
+        student_emb = get_sentence_embedding(student_answer)
+        correct_emb = get_sentence_embedding(correct_answer)
+        if student_emb is None or correct_emb is None:
+            return 0.0
+        # Calculate cosine similarity
+        similarity = torch.nn.functional.cosine_similarity(student_emb, correct_emb, dim=0)
+        return float(similarity)
+    except Exception as e:
+        logger.error(f"Error calculating similarity: {str(e)}")
+        return 0.0
+def question_vector_sentence(student_answer, correct_answer):
+    try:
+        return similarity_model_score(student_answer, correct_answer)
+    except Exception as e:
+        logger.error(f"Error in question_vector_sentence: {str(e)}")
+        return 0.0
+def question_vector_word(student_answer, correct_answer):
+    try:
+        # Split into words
+        student_words = student_answer.lower().split()
+        correct_words = correct_answer.lower().split()
+        # Calculate similarities for each word pair
+        similarities = []
+        for s_word in student_words:
+            word_sims = []
+            for c_word in correct_words:
+                sim = similarity_model_score(s_word, c_word)
+                word_sims.append(sim)
+            if word_sims:
+                similarities.append(max(word_sims))
+        # Return average similarity
+        if similarities:
+            return sum(similarities) / len(similarities)
+        return 0.0
+    except Exception as e:
+        logger.error(f"Error in question_vector_word: {str(e)}")
+        return 0.0
 # Use custom directory for gensim data
 gensim_data_dir = os.getenv('GENSIM_DATA_DIR', tempfile.gettempdir())
 # nltk.download('punkt')
 # nltk.download('stopwords')
 def preprocess(sentence):
     try:
         # Lowercase and remove punctuation
         print(f"Error in compute_scm: {str(e)}")
         return 0.5  # Return default similarity score
 def fasttext_similarity(correct_answer_vector, answer):
     try:
         preprocess_answer = preprocess(answer)