yamanavijayavardhan's picture
fix memory overlimit issue
8405423
import torch
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
from all_models import models
# Remove these lines since we're using the singleton
# MODEL_NAME = "google/flan-t5-xl"
# model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
# Get device and ensure model is on correct device
device = "cuda" if torch.cuda.is_available() else "cpu"
try:
models.flan_model.to(device)
except Exception as e:
print(f"Warning: Could not move model to device {device}: {e}")
def llm_score(correct_answers, answer):
try:
# Convert single answer to list if needed
if isinstance(correct_answers, str):
correct_answers = [correct_answers]
score = []
# Get model instance
model = models.get_flan_model()
tokenizer = models.flan_tokenizer
# Process each correct answer
for correct_answer in correct_answers:
try:
# Prepare input
input_text = f"Compare these answers and give a similarity score between 0 and 1:\nCorrect: {correct_answer}\nStudent: {answer}"
inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
inputs = {k: v.to(models.device) for k, v in inputs.items()}
# Generate score
with torch.no_grad(): # Disable gradient calculation
outputs = model.generate(
**inputs,
max_length=50,
num_return_sequences=1,
temperature=0.7,
do_sample=True
)
# Decode and extract score
score_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
try:
# Try to extract numeric score
score_value = float(score_text.split()[-1])
score.append(min(max(score_value, 0.0), 1.0)) # Clamp between 0 and 1
except (ValueError, IndexError):
# If no numeric score found, use default
score.append(0.5)
except Exception as e:
logger.error(f"Error processing answer: {str(e)}")
score.append(0.5) # Use default score on error
# Clean up tensors
del inputs
del outputs
torch.cuda.empty_cache()
return score
except Exception as e:
logger.error(f"Error in llm_score: {str(e)}")
return [0.5] # Return default score on error
finally:
# Release model reference
models.release_flan_model()