Spaces:

yamanavijayavardhan
/

answer-grading-app

Sleeping

App Files Files Community

yamanavijayavardhan commited on Apr 6

Commit

94589be

1 Parent(s): 6139662

printing extracted text19

Browse files

Files changed (3) hide show

all_models.py +27 -23
main.py +13 -9
similarity_check/semantic_meaning_check/semantic.py +48 -20

all_models.py CHANGED Viewed

@@ -124,27 +124,30 @@ class ModelSingleton:
         """Get ViT model with reference counting"""
         try:
             if self.vit_model is None:
-                from transformers import ViTImageProcessor, ViTModel, AutoModelForImageClassification, AutoConfig
                 logger.info("Loading ViT model...")
-                # Get model path - fix to use project root
                 import os
-                project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
                 model_path = os.path.join(project_root, 'models', 'vit-base-beans')
                 logger.info(f"Looking for model at: {model_path}")
                 if not os.path.exists(model_path):
                     raise FileNotFoundError(f"Model path does not exist: {model_path}")
                 # Check for model files
                 model_files = os.listdir(model_path)
-                logger.info(f"Found model files: {model_files}")
-                if 'model.safetensors' not in model_files or 'config.json' not in model_files:
-                    raise FileNotFoundError(f"Required model files missing in {model_path}")
-                # Create processor with explicit settings
                 self.vit_processor = ViTImageProcessor(
                     do_resize=True,
                     size=224,
@@ -154,34 +157,35 @@ class ModelSingleton:
                 )
                 try:
-                    # Load model with explicit settings
-                    logger.info("Loading ViT model with safetensors...")
                     self.vit_model = ViTModel.from_pretrained(
                         model_path,
                         local_files_only=True,
                         use_safetensors=True,
                         trust_remote_code=False
                     )
-                    self.vit_model.to(self.device)
-                    self.vit_model.eval()
-                    logger.info("ViT model loaded successfully")
-                except Exception as model_error:
-                    logger.error(f"Error loading model: {model_error}")
-                    # Try alternative loading method
                     try:
-                        logger.info("Attempting alternative model loading...")
                         self.vit_model = AutoModelForImageClassification.from_pretrained(
                             model_path,
                             local_files_only=True,
                             use_safetensors=True,
                             trust_remote_code=False
                         )
-                        self.vit_model.to(self.device)
-                        self.vit_model.eval()
-                        logger.info("ViT model loaded successfully using alternative method")
-                    except Exception as alt_error:
-                        logger.error(f"Alternative loading also failed: {alt_error}")
-                        raise
             self._reference_counts['vit'] += 1
             return self.vit_model, self.vit_processor

         """Get ViT model with reference counting"""
         try:
             if self.vit_model is None:
+                from transformers import ViTImageProcessor, ViTModel, AutoModelForImageClassification
                 logger.info("Loading ViT model...")
+                # Get model path - fix to use absolute path
                 import os
+                current_file_path = os.path.abspath(__file__)
+                project_root = os.path.dirname(os.path.dirname(current_file_path))
                 model_path = os.path.join(project_root, 'models', 'vit-base-beans')
                 logger.info(f"Looking for model at: {model_path}")
                 if not os.path.exists(model_path):
                     raise FileNotFoundError(f"Model path does not exist: {model_path}")
                 # Check for model files
                 model_files = os.listdir(model_path)
+                logger.info(f"Found model files: {', '.join(model_files)}")
+                if 'model.safetensors' not in model_files:
+                    raise FileNotFoundError(f"model.safetensors not found in {model_path}")
+                if 'config.json' not in model_files:
+                    raise FileNotFoundError(f"config.json not found in {model_path}")
+                # Create processor
                 self.vit_processor = ViTImageProcessor(
                     do_resize=True,
                     size=224,
                 )
                 try:
+                    # First try loading as ViTModel
+                    logger.info("Attempting to load as ViTModel...")
                     self.vit_model = ViTModel.from_pretrained(
                         model_path,
                         local_files_only=True,
                         use_safetensors=True,
                         trust_remote_code=False
                     )
+                except Exception as e1:
+                    logger.warning(f"Failed to load as ViTModel: {e1}")
                     try:
+                        # Try loading as AutoModelForImageClassification
+                        logger.info("Attempting to load as AutoModelForImageClassification...")
                         self.vit_model = AutoModelForImageClassification.from_pretrained(
                             model_path,
                             local_files_only=True,
                             use_safetensors=True,
                             trust_remote_code=False
                         )
+                    except Exception as e2:
+                        logger.error(f"Failed to load model using both methods")
+                        logger.error(f"ViTModel error: {e1}")
+                        logger.error(f"AutoModel error: {e2}")
+                        raise Exception("Failed to load model using any available method")
+                # Move model to device and set to eval mode
+                self.vit_model.to(self.device)
+                self.vit_model.eval()
+                logger.info(f"ViT model loaded successfully and moved to {self.device}")
             self._reference_counts['vit'] += 1
             return self.vit_model, self.vit_processor

main.py CHANGED Viewed

@@ -34,24 +34,28 @@ def log_print(message, level="INFO"):
     })
 # Set environment variables before any other imports
-os.environ['TRANSFORMERS_CACHE'] = os.path.join(tempfile.gettempdir(), 'huggingface_cache')
-os.environ['HF_HOME'] = os.path.join(tempfile.gettempdir(), 'huggingface')
-os.environ['TORCH_HOME'] = os.path.join(tempfile.gettempdir(), 'torch')
-os.environ['XDG_CACHE_HOME'] = os.path.join(tempfile.gettempdir(), 'cache')
-os.environ['SENTENCE_TRANSFORMERS_HOME'] = os.path.join(tempfile.gettempdir(), 'sentence_transformers')
-# Create all necessary cache directories
 cache_dirs = {
     'transformers': os.environ['TRANSFORMERS_CACHE'],
     'hf': os.environ['HF_HOME'],
     'torch': os.environ['TORCH_HOME'],
     'cache': os.environ['XDG_CACHE_HOME'],
-    'sentence_transformers': os.environ['SENTENCE_TRANSFORMERS_HOME']
 }
 for cache_name, cache_dir in cache_dirs.items():
     try:
-        os.makedirs(cache_dir, exist_ok=True)
         log_print(f"Created cache directory for {cache_name}: {cache_dir}")
     except Exception as e:
         log_print(f"Error creating {cache_name} cache directory: {e}", "ERROR")

     })
 # Set environment variables before any other imports
+cache_root = os.path.join(os.path.expanduser('~'), '.cache', 'answer_grading_app')
+os.environ['TRANSFORMERS_CACHE'] = os.path.join(cache_root, 'transformers')
+os.environ['HF_HOME'] = os.path.join(cache_root, 'huggingface')
+os.environ['TORCH_HOME'] = os.path.join(cache_root, 'torch')
+os.environ['XDG_CACHE_HOME'] = os.path.join(cache_root, 'cache')
+os.environ['SENTENCE_TRANSFORMERS_HOME'] = os.path.join(cache_root, 'sentence_transformers')
+os.environ['GENSIM_DATA_DIR'] = os.path.join(cache_root, 'gensim')
+# Create all necessary cache directories with proper permissions
 cache_dirs = {
+    'root': cache_root,
     'transformers': os.environ['TRANSFORMERS_CACHE'],
     'hf': os.environ['HF_HOME'],
     'torch': os.environ['TORCH_HOME'],
     'cache': os.environ['XDG_CACHE_HOME'],
+    'sentence_transformers': os.environ['SENTENCE_TRANSFORMERS_HOME'],
+    'gensim': os.environ['GENSIM_DATA_DIR']
 }
 for cache_name, cache_dir in cache_dirs.items():
     try:
+        os.makedirs(cache_dir, mode=0o755, exist_ok=True)
         log_print(f"Created cache directory for {cache_name}: {cache_dir}")
     except Exception as e:
         log_print(f"Error creating {cache_name} cache directory: {e}", "ERROR")

similarity_check/semantic_meaning_check/semantic.py CHANGED Viewed

@@ -15,6 +15,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
 from all_models import models
 import torch
 import logging
 # Set up logging
 logging.basicConfig(level=logging.INFO)
@@ -24,24 +25,51 @@ logger = logging.getLogger(__name__)
 gensim_data_dir = os.getenv('GENSIM_DATA_DIR', tempfile.gettempdir())
 os.environ['GENSIM_DATA_DIR'] = gensim_data_dir
-# Load fasttext with error handling
-try:
-    model_path = os.path.join(gensim_data_dir, 'fasttext-wiki-news-subwords-300', 'fasttext-wiki-news-subwords-300.gz')
-    if os.path.exists(model_path):
-        print("Loading fasttext model from cache...")
-        fasttext = KeyedVectors.load_word2vec_format(model_path)
-    else:
-        print("Loading fasttext model...")
-        fasttext = load('fasttext-wiki-news-subwords-300')
-except Exception as e:
-    print(f"Error loading fasttext model: {e}")
-    # Provide a fallback for similarity calculations
-    class DummyFasttext:
-        def __getitem__(self, word):
-            return np.zeros(300)  # Return zero vector of size 300
-        def __contains__(self, word):
-            return True
-    fasttext = DummyFasttext()
 def question_vector_sentence(correct_answer):
     """Get sentence embedding using shared model"""
@@ -143,8 +171,8 @@ def question_vector_word(correct_answer):
         # Get word embeddings
         embeddings = []
         for word in words:
-            if word in fasttext:
-                embeddings.append(fasttext[word])
         if not embeddings:
             return np.zeros(300)  # Return zero vector if no valid words

 from all_models import models
 import torch
 import logging
+from utils import log_print
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 gensim_data_dir = os.getenv('GENSIM_DATA_DIR', tempfile.gettempdir())
 os.environ['GENSIM_DATA_DIR'] = gensim_data_dir
+def load_fasttext_model():
+    """Load FastText model with proper error handling"""
+    try:
+        # Get the model directory from environment variable or use default
+        model_dir = os.getenv('GENSIM_DATA_DIR', os.path.expanduser('~/.cache/gensim-data'))
+        model_path = os.path.join(model_dir, 'fasttext-wiki-news-subwords-300', 'fasttext-wiki-news-subwords-300.gz')
+        # Create directory if it doesn't exist
+        os.makedirs(os.path.dirname(model_path), exist_ok=True)
+        if os.path.exists(model_path):
+            log_print("Loading fasttext model from cache...")
+            return KeyedVectors.load_word2vec_format(model_path)
+        else:
+            log_print("Downloading fasttext model...")
+            import gensim.downloader as api
+            model = api.load('fasttext-wiki-news-subwords-300')
+            # Save the model for future use
+            os.makedirs(os.path.dirname(model_path), exist_ok=True)
+            model.save_word2vec_format(model_path)
+            return model
+    except Exception as e:
+        log_print(f"Error loading fasttext model: {str(e)}", "ERROR")
+        # Return a dummy model that provides basic word vector functionality
+        return DummyFasttext()
+class DummyFasttext:
+    """Fallback class when FastText model fails to load"""
+    def __init__(self):
+        self.vector_size = 300
+        log_print("Using dummy FastText model due to loading error", "WARNING")
+    def get_vector(self, word):
+        return np.zeros(self.vector_size)
+    def __getitem__(self, word):
+        return self.get_vector(word)
+    def most_similar(self, word, topn=10):
+        return []
+# Load the model once at module level
+fasttext_model = load_fasttext_model()
 def question_vector_sentence(correct_answer):
     """Get sentence embedding using shared model"""
         # Get word embeddings
         embeddings = []
         for word in words:
+            if word in fasttext_model:
+                embeddings.append(fasttext_model[word])
         if not embeddings:
             return np.zeros(300)  # Return zero vector if no valid words