File size: 4,300 Bytes
599c2c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
"""
Test script to evaluate the fine-tuned model quality
"""

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def load_finetuned_model():
    """Load the fine-tuned model with LoRA adapters"""
    base_model_name = "HuggingFaceH4/zephyr-7b-beta"
    adapter_path = "models/lora_adapters"
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(base_model_name)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    # Load base model
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_name,
        torch_dtype=torch.float16,
        device_map="auto" if torch.cuda.is_available() else None,
        low_cpu_mem_usage=True
    )
    
    # Load LoRA adapters
    model = PeftModel.from_pretrained(base_model, adapter_path)
    
    # Move to MPS if available
    if torch.backends.mps.is_available():
        model = model.to("mps")
    
    return model, tokenizer

def generate_text(model, tokenizer, prompt, max_length=500):
    """Generate text using the fine-tuned model"""
    # Format as chat
    messages = [
        {"role": "system", "content": "You are Iain Morris, a witty British writer known for sharp observations about modern life, technology, and culture."},
        {"role": "user", "content": prompt}
    ]
    
    # Apply chat template
    formatted_prompt = tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True
    )
    
    # Tokenize
    inputs = tokenizer(formatted_prompt, return_tensors="pt")
    if torch.backends.mps.is_available():
        inputs = {k: v.to("mps") for k, v in inputs.items()}
    
    # Generate
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_length,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id
        )
    
    # Decode
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Extract just the assistant's response
    if "<|assistant|>" in generated_text:
        response = generated_text.split("<|assistant|>")[-1].strip()
    else:
        response = generated_text[len(formatted_prompt):].strip()
    
    return response

def main():
    """Test the fine-tuned model with sample prompts"""
    logger.info("Loading fine-tuned model...")
    
    try:
        model, tokenizer = load_finetuned_model()
        logger.info("Model loaded successfully!")
        
        # Test prompts
        test_prompts = [
            "Write about the absurdity of modern dating apps",
            "Describe a typical day working from home",
            "What's your take on social media influencers?",
            "Write about the experience of trying to be healthy in modern society"
        ]
        
        print("\n" + "="*60)
        print("FINE-TUNED MODEL OUTPUT SAMPLES")
        print("="*60)
        
        for i, prompt in enumerate(test_prompts, 1):
            print(f"\n--- Test {i}: {prompt} ---")
            try:
                response = generate_text(model, tokenizer, prompt)
                print(f"\nResponse:\n{response}")
                print("-" * 40)
            except Exception as e:
                print(f"Error generating response: {e}")
        
        print("\n" + "="*60)
        print("EVALUATION COMPLETE")
        print("="*60)
        
    except Exception as e:
        logger.error(f"Error loading model: {e}")
        print("\nModel testing failed. This might be because:")
        print("1. The model files weren't saved correctly")
        print("2. There's a compatibility issue")
        print("3. Insufficient memory")
        
        print(f"\nLoss of 1.988 is generally good for fine-tuning!")
        print("For comparison:")
        print("- Loss > 3.0: Poor quality, needs more training")
        print("- Loss 2.0-3.0: Decent quality, room for improvement") 
        print("- Loss 1.5-2.0: Good quality (your model is here!)")
        print("- Loss < 1.5: Very good, but watch for overfitting")

if __name__ == "__main__":
    main()