Spaces:
Sleeping
Sleeping
File size: 4,300 Bytes
599c2c0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
"""
Test script to evaluate the fine-tuned model quality
"""
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def load_finetuned_model():
"""Load the fine-tuned model with LoRA adapters"""
base_model_name = "HuggingFaceH4/zephyr-7b-beta"
adapter_path = "models/lora_adapters"
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
base_model_name,
torch_dtype=torch.float16,
device_map="auto" if torch.cuda.is_available() else None,
low_cpu_mem_usage=True
)
# Load LoRA adapters
model = PeftModel.from_pretrained(base_model, adapter_path)
# Move to MPS if available
if torch.backends.mps.is_available():
model = model.to("mps")
return model, tokenizer
def generate_text(model, tokenizer, prompt, max_length=500):
"""Generate text using the fine-tuned model"""
# Format as chat
messages = [
{"role": "system", "content": "You are Iain Morris, a witty British writer known for sharp observations about modern life, technology, and culture."},
{"role": "user", "content": prompt}
]
# Apply chat template
formatted_prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
# Tokenize
inputs = tokenizer(formatted_prompt, return_tensors="pt")
if torch.backends.mps.is_available():
inputs = {k: v.to("mps") for k, v in inputs.items()}
# Generate
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_length,
do_sample=True,
temperature=0.7,
top_p=0.9,
pad_token_id=tokenizer.eos_token_id
)
# Decode
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract just the assistant's response
if "<|assistant|>" in generated_text:
response = generated_text.split("<|assistant|>")[-1].strip()
else:
response = generated_text[len(formatted_prompt):].strip()
return response
def main():
"""Test the fine-tuned model with sample prompts"""
logger.info("Loading fine-tuned model...")
try:
model, tokenizer = load_finetuned_model()
logger.info("Model loaded successfully!")
# Test prompts
test_prompts = [
"Write about the absurdity of modern dating apps",
"Describe a typical day working from home",
"What's your take on social media influencers?",
"Write about the experience of trying to be healthy in modern society"
]
print("\n" + "="*60)
print("FINE-TUNED MODEL OUTPUT SAMPLES")
print("="*60)
for i, prompt in enumerate(test_prompts, 1):
print(f"\n--- Test {i}: {prompt} ---")
try:
response = generate_text(model, tokenizer, prompt)
print(f"\nResponse:\n{response}")
print("-" * 40)
except Exception as e:
print(f"Error generating response: {e}")
print("\n" + "="*60)
print("EVALUATION COMPLETE")
print("="*60)
except Exception as e:
logger.error(f"Error loading model: {e}")
print("\nModel testing failed. This might be because:")
print("1. The model files weren't saved correctly")
print("2. There's a compatibility issue")
print("3. Insufficient memory")
print(f"\nLoss of 1.988 is generally good for fine-tuning!")
print("For comparison:")
print("- Loss > 3.0: Poor quality, needs more training")
print("- Loss 2.0-3.0: Decent quality, room for improvement")
print("- Loss 1.5-2.0: Good quality (your model is here!)")
print("- Loss < 1.5: Very good, but watch for overfitting")
if __name__ == "__main__":
main()
|