#!/usr/bin/env python3 """ Dwrko-M1.0 Testing Script Test your fine-tuned Claude-like AI assistant """ import torch import argparse from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel import time def load_dwrko_model(model_path): """Load fine-tuned Dwrko-M1.0 model""" print(f"๐Ÿค– Loading Dwrko-M1.0 from {model_path}") # Load base tokenizer tokenizer = AutoTokenizer.from_pretrained("bigcode/starcoder2-3b") if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # Load base model base_model = AutoModelForCausalLM.from_pretrained( "bigcode/starcoder2-3b", torch_dtype=torch.float16, device_map="auto" ) # Load LoRA adapters model = PeftModel.from_pretrained(base_model, model_path) model = model.merge_and_unload() # Merge adapters for faster inference print("โœ… Dwrko-M1.0 loaded successfully!") return model, tokenizer def generate_response(model, tokenizer, prompt, max_length=512, temperature=0.7): """Generate response from Dwrko-M1.0""" # Format prompt formatted_prompt = f"### Instruction:\n{prompt}\n\n### Response:\n" # Tokenize inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device) # Generate start_time = time.time() with torch.no_grad(): outputs = model.generate( inputs.input_ids, max_length=max_length, temperature=temperature, do_sample=True, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id, top_p=0.9, repetition_penalty=1.1 ) generation_time = time.time() - start_time # Decode response full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) response = full_response.split("### Response:\n")[-1].strip() # Calculate tokens per second output_tokens = len(outputs[0]) - len(inputs.input_ids[0]) tokens_per_second = output_tokens / generation_time if generation_time > 0 else 0 return response, tokens_per_second def run_test_suite(model, tokenizer): """Run comprehensive test suite for Dwrko-M1.0""" print("\n" + "="*60) print("๐Ÿงช Running Dwrko-M1.0 Test Suite") print("="*60) test_prompts = [ # Coding Tests { "category": "๐Ÿ’ป Coding", "prompt": "Write a Python function to calculate the factorial of a number using recursion.", "expected_keywords": ["def", "factorial", "return", "if", "else"] }, { "category": "๐Ÿ’ป Coding", "prompt": "How do you reverse a string in Python? Show me 3 different methods.", "expected_keywords": ["[::-1]", "reversed", "for", "range"] }, { "category": "๐Ÿ’ป Coding", "prompt": "Write a function to check if a number is prime.", "expected_keywords": ["def", "prime", "for", "range", "return"] }, # Reasoning Tests { "category": "๐Ÿง  Reasoning", "prompt": "If a train travels 120 miles in 2 hours, what is its average speed?", "expected_keywords": ["60", "mph", "speed", "miles", "hour"] }, { "category": "๐Ÿง  Reasoning", "prompt": "Solve this equation: 2x + 5 = 13. Show your work.", "expected_keywords": ["x", "4", "subtract", "divide", "2x"] }, { "category": "๐Ÿง  Reasoning", "prompt": "What is the next number in this sequence: 2, 4, 8, 16, ?", "expected_keywords": ["32", "double", "multiply", "pattern"] }, # Explanation Tests { "category": "๐Ÿ“š Explanation", "prompt": "Explain what machine learning is in simple terms.", "expected_keywords": ["algorithm", "data", "learn", "pattern", "computer"] }, { "category": "๐Ÿ“š Explanation", "prompt": "What is the difference between a list and a tuple in Python?", "expected_keywords": ["mutable", "immutable", "[]", "()", "change"] } ] total_tests = len(test_prompts) passed_tests = 0 total_tokens_per_second = 0 for i, test in enumerate(test_prompts, 1): print(f"\n๐Ÿ” Test {i}/{total_tests} - {test['category']}") print(f"โ“ Prompt: {test['prompt']}") # Generate response response, tps = generate_response(model, tokenizer, test['prompt']) print(f"๐Ÿค– Dwrko-M1.0: {response[:200]}{'...' if len(response) > 200 else ''}") print(f"โšก Speed: {tps:.1f} tokens/second") # Check if response contains expected keywords response_lower = response.lower() found_keywords = sum(1 for keyword in test['expected_keywords'] if keyword.lower() in response_lower) if found_keywords >= len(test['expected_keywords']) // 2: # At least half keywords found print("โœ… Test PASSED") passed_tests += 1 else: print("โŒ Test FAILED") print(f" Expected keywords: {test['expected_keywords']}") total_tokens_per_second += tps print("-" * 60) # Final results print(f"\n๐Ÿ“Š Test Results Summary:") print(f"โœ… Passed: {passed_tests}/{total_tests} ({passed_tests/total_tests*100:.1f}%)") print(f"โšก Average Speed: {total_tokens_per_second/total_tests:.1f} tokens/second") if passed_tests/total_tests >= 0.7: print("๐ŸŽ‰ Dwrko-M1.0 is performing well!") else: print("โš ๏ธ Consider additional training or parameter tuning") def interactive_mode(model, tokenizer): """Interactive chat with Dwrko-M1.0""" print("\n" + "="*60) print("๐Ÿ’ฌ Interactive Mode - Chat with Dwrko-M1.0") print("Type 'quit' to exit") print("="*60) while True: user_input = input("\n๐Ÿ‘ค You: ").strip() if user_input.lower() in ['quit', 'exit', 'q']: print("๐Ÿ‘‹ Goodbye!") break if not user_input: continue print("๐Ÿค– Dwrko-M1.0: ", end="", flush=True) response, tps = generate_response(model, tokenizer, user_input, max_length=256) print(response) print(f" โšก {tps:.1f} tokens/sec") def main(): parser = argparse.ArgumentParser(description="Test Dwrko-M1.0 Model") parser.add_argument("--model_path", required=True, help="Path to fine-tuned Dwrko-M1.0") parser.add_argument("--test_suite", action="store_true", help="Run automated test suite") parser.add_argument("--interactive", action="store_true", help="Start interactive chat") parser.add_argument("--single_test", type=str, help="Test single prompt") args = parser.parse_args() # Load model model, tokenizer = load_dwrko_model(args.model_path) if args.test_suite: run_test_suite(model, tokenizer) if args.single_test: print(f"\n๐Ÿ” Testing single prompt: {args.single_test}") response, tps = generate_response(model, tokenizer, args.single_test) print(f"๐Ÿค– Dwrko-M1.0: {response}") print(f"โšก Speed: {tps:.1f} tokens/second") if args.interactive: interactive_mode(model, tokenizer) if not any([args.test_suite, args.interactive, args.single_test]): print("\nโš ๏ธ Please specify --test_suite, --interactive, or --single_test") print("Example: python test_dwrko.py --model_path ./dwrko-m1.0 --test_suite") if __name__ == "__main__": main()