File size: 2,862 Bytes
51c49bc
 
 
 
 
 
 
 
 
 
 
26f855a
51c49bc
26f855a
 
 
 
51c49bc
 
26f855a
292928f
 
 
 
26f855a
8405423
 
 
 
 
 
26f855a
 
8405423
 
 
 
26f855a
8405423
 
 
26f855a
8405423
26f855a
8405423
 
26f855a
8405423
 
 
 
 
 
 
 
 
 
26f855a
 
8405423
 
292928f
8405423
 
 
 
 
2795ce6
26f855a
 
8405423
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import torch
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
from all_models import models

# Remove these lines since we're using the singleton
# MODEL_NAME = "google/flan-t5-xl"
# model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Get device and ensure model is on correct device
device = "cuda" if torch.cuda.is_available() else "cpu"
try:
    models.flan_model.to(device)
except Exception as e:
    print(f"Warning: Could not move model to device {device}: {e}")

def llm_score(correct_answers, answer):
    try:
        # Convert single answer to list if needed
        if isinstance(correct_answers, str):
            correct_answers = [correct_answers]
            
        score = []
        
        # Get model instance
        model = models.get_flan_model()
        tokenizer = models.flan_tokenizer
        
        # Process each correct answer
        for correct_answer in correct_answers:
            try:
                # Prepare input
                input_text = f"Compare these answers and give a similarity score between 0 and 1:\nCorrect: {correct_answer}\nStudent: {answer}"
                inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
                inputs = {k: v.to(models.device) for k, v in inputs.items()}
                
                # Generate score
                with torch.no_grad():  # Disable gradient calculation
                    outputs = model.generate(
                        **inputs,
                        max_length=50,
                        num_return_sequences=1,
                        temperature=0.7,
                        do_sample=True
                    )
                
                # Decode and extract score
                score_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
                try:
                    # Try to extract numeric score
                    score_value = float(score_text.split()[-1])
                    score.append(min(max(score_value, 0.0), 1.0))  # Clamp between 0 and 1
                except (ValueError, IndexError):
                    # If no numeric score found, use default
                    score.append(0.5)
                    
            except Exception as e:
                logger.error(f"Error processing answer: {str(e)}")
                score.append(0.5)  # Use default score on error
                
        # Clean up tensors
        del inputs
        del outputs
        torch.cuda.empty_cache()
        
        return score
        
    except Exception as e:
        logger.error(f"Error in llm_score: {str(e)}")
        return [0.5]  # Return default score on error
    finally:
        # Release model reference
        models.release_flan_model()