Spaces:

yamanavijayavardhan
/

answer-grading-app

Running

App Files Files Community

answer-grading-app / similarity_check /llm_based_scoring /llm.py

yamanavijayavardhan

fix memory overlimit issue

8405423 2 months ago

raw

history blame contribute delete

2.86 kB

	import torch
	import sys
	import os
	sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
	from all_models import models

	# Remove these lines since we're using the singleton
	# MODEL_NAME = "google/flan-t5-xl"
	# model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
	# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

	# Get device and ensure model is on correct device
	device = "cuda" if torch.cuda.is_available() else "cpu"
	try:
	models.flan_model.to(device)
	except Exception as e:
	print(f"Warning: Could not move model to device {device}: {e}")

	def llm_score(correct_answers, answer):
	try:
	# Convert single answer to list if needed
	if isinstance(correct_answers, str):
	correct_answers = [correct_answers]

	score = []

	# Get model instance
	model = models.get_flan_model()
	tokenizer = models.flan_tokenizer

	# Process each correct answer
	for correct_answer in correct_answers:
	try:
	# Prepare input
	input_text = f"Compare these answers and give a similarity score between 0 and 1:\nCorrect: {correct_answer}\nStudent: {answer}"
	inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
	inputs = {k: v.to(models.device) for k, v in inputs.items()}

	# Generate score
	with torch.no_grad(): # Disable gradient calculation
	outputs = model.generate(
	**inputs,
	max_length=50,
	num_return_sequences=1,
	temperature=0.7,
	do_sample=True
	)

	# Decode and extract score
	score_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
	try:
	# Try to extract numeric score
	score_value = float(score_text.split()[-1])
	score.append(min(max(score_value, 0.0), 1.0)) # Clamp between 0 and 1
	except (ValueError, IndexError):
	# If no numeric score found, use default
	score.append(0.5)

	except Exception as e:
	logger.error(f"Error processing answer: {str(e)}")
	score.append(0.5) # Use default score on error

	# Clean up tensors
	del inputs
	del outputs
	torch.cuda.empty_cache()

	return score

	except Exception as e:
	logger.error(f"Error in llm_score: {str(e)}")
	return [0.5] # Return default score on error
	finally:
	# Release model reference
	models.release_flan_model()