Spaces:
Running
Running
File size: 7,810 Bytes
97381e8 74855c7 97381e8 74855c7 97381e8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
#!/usr/bin/env python3
"""
Dwrko-M1.0 Testing Script
Test your fine-tuned Claude-like AI assistant
"""
import torch
import argparse
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import time
def load_dwrko_model(model_path):
"""Load fine-tuned Dwrko-M1.0 model"""
print(f"π€ Loading Dwrko-M1.0 from {model_path}")
# Load base tokenizer
tokenizer = AutoTokenizer.from_pretrained("bigcode/starcoder2-3b")
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
"bigcode/starcoder2-3b",
torch_dtype=torch.float16,
device_map="auto"
)
# Load LoRA adapters
model = PeftModel.from_pretrained(base_model, model_path)
model = model.merge_and_unload() # Merge adapters for faster inference
print("β
Dwrko-M1.0 loaded successfully!")
return model, tokenizer
def generate_response(model, tokenizer, prompt, max_length=512, temperature=0.7):
"""Generate response from Dwrko-M1.0"""
# Format prompt
formatted_prompt = f"### Instruction:\n{prompt}\n\n### Response:\n"
# Tokenize
inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
# Generate
start_time = time.time()
with torch.no_grad():
outputs = model.generate(
inputs.input_ids,
max_length=max_length,
temperature=temperature,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
top_p=0.9,
repetition_penalty=1.1
)
generation_time = time.time() - start_time
# Decode response
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
response = full_response.split("### Response:\n")[-1].strip()
# Calculate tokens per second
output_tokens = len(outputs[0]) - len(inputs.input_ids[0])
tokens_per_second = output_tokens / generation_time if generation_time > 0 else 0
return response, tokens_per_second
def run_test_suite(model, tokenizer):
"""Run comprehensive test suite for Dwrko-M1.0"""
print("\n" + "="*60)
print("π§ͺ Running Dwrko-M1.0 Test Suite")
print("="*60)
test_prompts = [
# Coding Tests
{
"category": "π» Coding",
"prompt": "Write a Python function to calculate the factorial of a number using recursion.",
"expected_keywords": ["def", "factorial", "return", "if", "else"]
},
{
"category": "π» Coding",
"prompt": "How do you reverse a string in Python? Show me 3 different methods.",
"expected_keywords": ["[::-1]", "reversed", "for", "range"]
},
{
"category": "π» Coding",
"prompt": "Write a function to check if a number is prime.",
"expected_keywords": ["def", "prime", "for", "range", "return"]
},
# Reasoning Tests
{
"category": "π§ Reasoning",
"prompt": "If a train travels 120 miles in 2 hours, what is its average speed?",
"expected_keywords": ["60", "mph", "speed", "miles", "hour"]
},
{
"category": "π§ Reasoning",
"prompt": "Solve this equation: 2x + 5 = 13. Show your work.",
"expected_keywords": ["x", "4", "subtract", "divide", "2x"]
},
{
"category": "π§ Reasoning",
"prompt": "What is the next number in this sequence: 2, 4, 8, 16, ?",
"expected_keywords": ["32", "double", "multiply", "pattern"]
},
# Explanation Tests
{
"category": "π Explanation",
"prompt": "Explain what machine learning is in simple terms.",
"expected_keywords": ["algorithm", "data", "learn", "pattern", "computer"]
},
{
"category": "π Explanation",
"prompt": "What is the difference between a list and a tuple in Python?",
"expected_keywords": ["mutable", "immutable", "[]", "()", "change"]
}
]
total_tests = len(test_prompts)
passed_tests = 0
total_tokens_per_second = 0
for i, test in enumerate(test_prompts, 1):
print(f"\nπ Test {i}/{total_tests} - {test['category']}")
print(f"β Prompt: {test['prompt']}")
# Generate response
response, tps = generate_response(model, tokenizer, test['prompt'])
print(f"π€ Dwrko-M1.0: {response[:200]}{'...' if len(response) > 200 else ''}")
print(f"β‘ Speed: {tps:.1f} tokens/second")
# Check if response contains expected keywords
response_lower = response.lower()
found_keywords = sum(1 for keyword in test['expected_keywords']
if keyword.lower() in response_lower)
if found_keywords >= len(test['expected_keywords']) // 2: # At least half keywords found
print("β
Test PASSED")
passed_tests += 1
else:
print("β Test FAILED")
print(f" Expected keywords: {test['expected_keywords']}")
total_tokens_per_second += tps
print("-" * 60)
# Final results
print(f"\nπ Test Results Summary:")
print(f"β
Passed: {passed_tests}/{total_tests} ({passed_tests/total_tests*100:.1f}%)")
print(f"β‘ Average Speed: {total_tokens_per_second/total_tests:.1f} tokens/second")
if passed_tests/total_tests >= 0.7:
print("π Dwrko-M1.0 is performing well!")
else:
print("β οΈ Consider additional training or parameter tuning")
def interactive_mode(model, tokenizer):
"""Interactive chat with Dwrko-M1.0"""
print("\n" + "="*60)
print("π¬ Interactive Mode - Chat with Dwrko-M1.0")
print("Type 'quit' to exit")
print("="*60)
while True:
user_input = input("\nπ€ You: ").strip()
if user_input.lower() in ['quit', 'exit', 'q']:
print("π Goodbye!")
break
if not user_input:
continue
print("π€ Dwrko-M1.0: ", end="", flush=True)
response, tps = generate_response(model, tokenizer, user_input, max_length=256)
print(response)
print(f" β‘ {tps:.1f} tokens/sec")
def main():
parser = argparse.ArgumentParser(description="Test Dwrko-M1.0 Model")
parser.add_argument("--model_path", required=True, help="Path to fine-tuned Dwrko-M1.0")
parser.add_argument("--test_suite", action="store_true", help="Run automated test suite")
parser.add_argument("--interactive", action="store_true", help="Start interactive chat")
parser.add_argument("--single_test", type=str, help="Test single prompt")
args = parser.parse_args()
# Load model
model, tokenizer = load_dwrko_model(args.model_path)
if args.test_suite:
run_test_suite(model, tokenizer)
if args.single_test:
print(f"\nπ Testing single prompt: {args.single_test}")
response, tps = generate_response(model, tokenizer, args.single_test)
print(f"π€ Dwrko-M1.0: {response}")
print(f"β‘ Speed: {tps:.1f} tokens/second")
if args.interactive:
interactive_mode(model, tokenizer)
if not any([args.test_suite, args.interactive, args.single_test]):
print("\nβ οΈ Please specify --test_suite, --interactive, or --single_test")
print("Example: python test_dwrko.py --model_path ./dwrko-m1.0 --test_suite")
if __name__ == "__main__":
main() |