File size: 3,716 Bytes
70ea05e
536d515
70ea05e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
536d515
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70ea05e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import torch
import sys
from transformers import AutoModelForCausalLM, AutoTokenizer
import numpy as np

def evaluate_perplexity(model_name, revision="main", test_text=None):
    """
    Evaluate perplexity on a fixed piece of text.
    
    Args:
        model_name: Hugging Face model identifier
        revision: Model revision/commit hash
        test_text: Text to evaluate perplexity on (default if None)
    
    Returns:
        float: Perplexity score (lower is better)
    """
    
    try:
        sys.stderr.write(f"Loading model: {model_name} (revision: {revision})\n")
        sys.stderr.flush()
        
        # Default test text if none provided
        if test_text is None:
            test_text = """Artificial intelligence has transformed the way we live and work, bringing both opportunities and challenges. 
            From autonomous vehicles to language models that can engage in human-like conversation, AI technologies are becoming increasingly 
            sophisticated. However, with this advancement comes the responsibility to ensure these systems are developed and deployed ethically, 
            with careful consideration for privacy, fairness, and transparency. The future of AI will likely depend on how well we balance innovation 
            with these important social considerations."""
        
        sys.stderr.write("Loading tokenizer...\n")
        sys.stderr.flush()
        # Load tokenizer first
        tokenizer = AutoTokenizer.from_pretrained(model_name, revision=revision)
        sys.stderr.write("Tokenizer loaded successfully\n")
        sys.stderr.flush()
        
        sys.stderr.write("Loading model...\n")
        sys.stderr.flush()
        # Load model
        model = AutoModelForCausalLM.from_pretrained(
            model_name, 
            revision=revision,
            torch_dtype=torch.float16,
            device_map="auto"
        )
        sys.stderr.write("Model loaded successfully\n")
        sys.stderr.flush()
        
        sys.stderr.write("Tokenizing input text...\n")
        sys.stderr.flush()
        # Tokenize the text
        inputs = tokenizer(test_text, return_tensors="pt")
        sys.stderr.write(f"Tokenized input shape: {inputs['input_ids'].shape}\n")
        sys.stderr.flush()
        
        # Move to same device as model
        inputs = {k: v.to(model.device) for k, v in inputs.items()}
        sys.stderr.write(f"Moved inputs to device: {model.device}\n")
        sys.stderr.flush()
        
        sys.stderr.write("Running forward pass...\n")
        sys.stderr.flush()
        # Calculate loss
        with torch.no_grad():
            outputs = model(**inputs, labels=inputs["input_ids"])
            loss = outputs.loss
        
        sys.stderr.write(f"Calculated loss: {loss.item()}\n")
        sys.stderr.flush()
        
        # Calculate perplexity
        perplexity = torch.exp(loss).item()
        sys.stderr.write(f"Final perplexity: {perplexity}\n")
        sys.stderr.flush()
        
        return perplexity
        
    except Exception as e:
        import traceback
        sys.stderr.write(f"Error in evaluate_perplexity: {e}\n")
        sys.stderr.write(f"Traceback: {traceback.format_exc()}\n")
        sys.stderr.flush()
        raise

def create_perplexity_result(model_name, revision, precision, perplexity_score):
    """
    Create a result file in the expected format.
    """
    return {
        "config": {
            "model_dtype": f"torch.{precision}",
            "model_name": model_name,
            "model_sha": revision,
        },
        "results": {
            "perplexity": {
                "perplexity": perplexity_score,
            }
        }
    }