Dwrko-M1.0 / test_dwrko.py
rajatsainisim's picture
πŸ”§ Switch to StarCoder2-3B: Fix gated model access issue
74855c7
raw
history blame
7.81 kB
#!/usr/bin/env python3
"""
Dwrko-M1.0 Testing Script
Test your fine-tuned Claude-like AI assistant
"""
import torch
import argparse
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import time
def load_dwrko_model(model_path):
"""Load fine-tuned Dwrko-M1.0 model"""
print(f"πŸ€– Loading Dwrko-M1.0 from {model_path}")
# Load base tokenizer
tokenizer = AutoTokenizer.from_pretrained("bigcode/starcoder2-3b")
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
"bigcode/starcoder2-3b",
torch_dtype=torch.float16,
device_map="auto"
)
# Load LoRA adapters
model = PeftModel.from_pretrained(base_model, model_path)
model = model.merge_and_unload() # Merge adapters for faster inference
print("βœ… Dwrko-M1.0 loaded successfully!")
return model, tokenizer
def generate_response(model, tokenizer, prompt, max_length=512, temperature=0.7):
"""Generate response from Dwrko-M1.0"""
# Format prompt
formatted_prompt = f"### Instruction:\n{prompt}\n\n### Response:\n"
# Tokenize
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
# Generate
start_time = time.time()
with torch.no_grad():
outputs = model.generate(
inputs.input_ids,
max_length=max_length,
temperature=temperature,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
top_p=0.9,
repetition_penalty=1.1
)
generation_time = time.time() - start_time
# Decode response
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = full_response.split("### Response:\n")[-1].strip()
# Calculate tokens per second
output_tokens = len(outputs[0]) - len(inputs.input_ids[0])
tokens_per_second = output_tokens / generation_time if generation_time > 0 else 0
return response, tokens_per_second
def run_test_suite(model, tokenizer):
"""Run comprehensive test suite for Dwrko-M1.0"""
print("\n" + "="*60)
print("πŸ§ͺ Running Dwrko-M1.0 Test Suite")
print("="*60)
test_prompts = [
# Coding Tests
{
"category": "πŸ’» Coding",
"prompt": "Write a Python function to calculate the factorial of a number using recursion.",
"expected_keywords": ["def", "factorial", "return", "if", "else"]
},
{
"category": "πŸ’» Coding",
"prompt": "How do you reverse a string in Python? Show me 3 different methods.",
"expected_keywords": ["[::-1]", "reversed", "for", "range"]
},
{
"category": "πŸ’» Coding",
"prompt": "Write a function to check if a number is prime.",
"expected_keywords": ["def", "prime", "for", "range", "return"]
},
# Reasoning Tests
{
"category": "🧠 Reasoning",
"prompt": "If a train travels 120 miles in 2 hours, what is its average speed?",
"expected_keywords": ["60", "mph", "speed", "miles", "hour"]
},
{
"category": "🧠 Reasoning",
"prompt": "Solve this equation: 2x + 5 = 13. Show your work.",
"expected_keywords": ["x", "4", "subtract", "divide", "2x"]
},
{
"category": "🧠 Reasoning",
"prompt": "What is the next number in this sequence: 2, 4, 8, 16, ?",
"expected_keywords": ["32", "double", "multiply", "pattern"]
},
# Explanation Tests
{
"category": "πŸ“š Explanation",
"prompt": "Explain what machine learning is in simple terms.",
"expected_keywords": ["algorithm", "data", "learn", "pattern", "computer"]
},
{
"category": "πŸ“š Explanation",
"prompt": "What is the difference between a list and a tuple in Python?",
"expected_keywords": ["mutable", "immutable", "[]", "()", "change"]
}
]
total_tests = len(test_prompts)
passed_tests = 0
total_tokens_per_second = 0
for i, test in enumerate(test_prompts, 1):
print(f"\nπŸ” Test {i}/{total_tests} - {test['category']}")
print(f"❓ Prompt: {test['prompt']}")
# Generate response
response, tps = generate_response(model, tokenizer, test['prompt'])
print(f"πŸ€– Dwrko-M1.0: {response[:200]}{'...' if len(response) > 200 else ''}")
print(f"⚑ Speed: {tps:.1f} tokens/second")
# Check if response contains expected keywords
response_lower = response.lower()
found_keywords = sum(1 for keyword in test['expected_keywords']
if keyword.lower() in response_lower)
if found_keywords >= len(test['expected_keywords']) // 2: # At least half keywords found
print("βœ… Test PASSED")
passed_tests += 1
else:
print("❌ Test FAILED")
print(f" Expected keywords: {test['expected_keywords']}")
total_tokens_per_second += tps
print("-" * 60)
# Final results
print(f"\nπŸ“Š Test Results Summary:")
print(f"βœ… Passed: {passed_tests}/{total_tests} ({passed_tests/total_tests*100:.1f}%)")
print(f"⚑ Average Speed: {total_tokens_per_second/total_tests:.1f} tokens/second")
if passed_tests/total_tests >= 0.7:
print("πŸŽ‰ Dwrko-M1.0 is performing well!")
else:
print("⚠️ Consider additional training or parameter tuning")
def interactive_mode(model, tokenizer):
"""Interactive chat with Dwrko-M1.0"""
print("\n" + "="*60)
print("πŸ’¬ Interactive Mode - Chat with Dwrko-M1.0")
print("Type 'quit' to exit")
print("="*60)
while True:
user_input = input("\nπŸ‘€ You: ").strip()
if user_input.lower() in ['quit', 'exit', 'q']:
print("πŸ‘‹ Goodbye!")
break
if not user_input:
continue
print("πŸ€– Dwrko-M1.0: ", end="", flush=True)
response, tps = generate_response(model, tokenizer, user_input, max_length=256)
print(response)
print(f" ⚑ {tps:.1f} tokens/sec")
def main():
parser = argparse.ArgumentParser(description="Test Dwrko-M1.0 Model")
parser.add_argument("--model_path", required=True, help="Path to fine-tuned Dwrko-M1.0")
parser.add_argument("--test_suite", action="store_true", help="Run automated test suite")
parser.add_argument("--interactive", action="store_true", help="Start interactive chat")
parser.add_argument("--single_test", type=str, help="Test single prompt")
args = parser.parse_args()
# Load model
model, tokenizer = load_dwrko_model(args.model_path)
if args.test_suite:
run_test_suite(model, tokenizer)
if args.single_test:
print(f"\nπŸ” Testing single prompt: {args.single_test}")
response, tps = generate_response(model, tokenizer, args.single_test)
print(f"πŸ€– Dwrko-M1.0: {response}")
print(f"⚑ Speed: {tps:.1f} tokens/second")
if args.interactive:
interactive_mode(model, tokenizer)
if not any([args.test_suite, args.interactive, args.single_test]):
print("\n⚠️ Please specify --test_suite, --interactive, or --single_test")
print("Example: python test_dwrko.py --model_path ./dwrko-m1.0 --test_suite")
if __name__ == "__main__":
main()