Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| Dwrko-M1.0 Testing Script | |
| Test your fine-tuned Claude-like AI assistant | |
| """ | |
| import torch | |
| import argparse | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from peft import PeftModel | |
| import time | |
| def load_dwrko_model(model_path): | |
| """Load fine-tuned Dwrko-M1.0 model""" | |
| print(f"π€ Loading Dwrko-M1.0 from {model_path}") | |
| # Load base tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained("bigcode/starcoder2-3b") | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| # Load base model | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| "bigcode/starcoder2-3b", | |
| torch_dtype=torch.float16, | |
| device_map="auto" | |
| ) | |
| # Load LoRA adapters | |
| model = PeftModel.from_pretrained(base_model, model_path) | |
| model = model.merge_and_unload() # Merge adapters for faster inference | |
| print("β Dwrko-M1.0 loaded successfully!") | |
| return model, tokenizer | |
| def generate_response(model, tokenizer, prompt, max_length=512, temperature=0.7): | |
| """Generate response from Dwrko-M1.0""" | |
| # Format prompt | |
| formatted_prompt = f"### Instruction:\n{prompt}\n\n### Response:\n" | |
| # Tokenize | |
| inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device) | |
| # Generate | |
| start_time = time.time() | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| inputs.input_ids, | |
| max_length=max_length, | |
| temperature=temperature, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id, | |
| eos_token_id=tokenizer.eos_token_id, | |
| top_p=0.9, | |
| repetition_penalty=1.1 | |
| ) | |
| generation_time = time.time() - start_time | |
| # Decode response | |
| full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| response = full_response.split("### Response:\n")[-1].strip() | |
| # Calculate tokens per second | |
| output_tokens = len(outputs[0]) - len(inputs.input_ids[0]) | |
| tokens_per_second = output_tokens / generation_time if generation_time > 0 else 0 | |
| return response, tokens_per_second | |
| def run_test_suite(model, tokenizer): | |
| """Run comprehensive test suite for Dwrko-M1.0""" | |
| print("\n" + "="*60) | |
| print("π§ͺ Running Dwrko-M1.0 Test Suite") | |
| print("="*60) | |
| test_prompts = [ | |
| # Coding Tests | |
| { | |
| "category": "π» Coding", | |
| "prompt": "Write a Python function to calculate the factorial of a number using recursion.", | |
| "expected_keywords": ["def", "factorial", "return", "if", "else"] | |
| }, | |
| { | |
| "category": "π» Coding", | |
| "prompt": "How do you reverse a string in Python? Show me 3 different methods.", | |
| "expected_keywords": ["[::-1]", "reversed", "for", "range"] | |
| }, | |
| { | |
| "category": "π» Coding", | |
| "prompt": "Write a function to check if a number is prime.", | |
| "expected_keywords": ["def", "prime", "for", "range", "return"] | |
| }, | |
| # Reasoning Tests | |
| { | |
| "category": "π§ Reasoning", | |
| "prompt": "If a train travels 120 miles in 2 hours, what is its average speed?", | |
| "expected_keywords": ["60", "mph", "speed", "miles", "hour"] | |
| }, | |
| { | |
| "category": "π§ Reasoning", | |
| "prompt": "Solve this equation: 2x + 5 = 13. Show your work.", | |
| "expected_keywords": ["x", "4", "subtract", "divide", "2x"] | |
| }, | |
| { | |
| "category": "π§ Reasoning", | |
| "prompt": "What is the next number in this sequence: 2, 4, 8, 16, ?", | |
| "expected_keywords": ["32", "double", "multiply", "pattern"] | |
| }, | |
| # Explanation Tests | |
| { | |
| "category": "π Explanation", | |
| "prompt": "Explain what machine learning is in simple terms.", | |
| "expected_keywords": ["algorithm", "data", "learn", "pattern", "computer"] | |
| }, | |
| { | |
| "category": "π Explanation", | |
| "prompt": "What is the difference between a list and a tuple in Python?", | |
| "expected_keywords": ["mutable", "immutable", "[]", "()", "change"] | |
| } | |
| ] | |
| total_tests = len(test_prompts) | |
| passed_tests = 0 | |
| total_tokens_per_second = 0 | |
| for i, test in enumerate(test_prompts, 1): | |
| print(f"\nπ Test {i}/{total_tests} - {test['category']}") | |
| print(f"β Prompt: {test['prompt']}") | |
| # Generate response | |
| response, tps = generate_response(model, tokenizer, test['prompt']) | |
| print(f"π€ Dwrko-M1.0: {response[:200]}{'...' if len(response) > 200 else ''}") | |
| print(f"β‘ Speed: {tps:.1f} tokens/second") | |
| # Check if response contains expected keywords | |
| response_lower = response.lower() | |
| found_keywords = sum(1 for keyword in test['expected_keywords'] | |
| if keyword.lower() in response_lower) | |
| if found_keywords >= len(test['expected_keywords']) // 2: # At least half keywords found | |
| print("β Test PASSED") | |
| passed_tests += 1 | |
| else: | |
| print("β Test FAILED") | |
| print(f" Expected keywords: {test['expected_keywords']}") | |
| total_tokens_per_second += tps | |
| print("-" * 60) | |
| # Final results | |
| print(f"\nπ Test Results Summary:") | |
| print(f"β Passed: {passed_tests}/{total_tests} ({passed_tests/total_tests*100:.1f}%)") | |
| print(f"β‘ Average Speed: {total_tokens_per_second/total_tests:.1f} tokens/second") | |
| if passed_tests/total_tests >= 0.7: | |
| print("π Dwrko-M1.0 is performing well!") | |
| else: | |
| print("β οΈ Consider additional training or parameter tuning") | |
| def interactive_mode(model, tokenizer): | |
| """Interactive chat with Dwrko-M1.0""" | |
| print("\n" + "="*60) | |
| print("π¬ Interactive Mode - Chat with Dwrko-M1.0") | |
| print("Type 'quit' to exit") | |
| print("="*60) | |
| while True: | |
| user_input = input("\nπ€ You: ").strip() | |
| if user_input.lower() in ['quit', 'exit', 'q']: | |
| print("π Goodbye!") | |
| break | |
| if not user_input: | |
| continue | |
| print("π€ Dwrko-M1.0: ", end="", flush=True) | |
| response, tps = generate_response(model, tokenizer, user_input, max_length=256) | |
| print(response) | |
| print(f" β‘ {tps:.1f} tokens/sec") | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Test Dwrko-M1.0 Model") | |
| parser.add_argument("--model_path", required=True, help="Path to fine-tuned Dwrko-M1.0") | |
| parser.add_argument("--test_suite", action="store_true", help="Run automated test suite") | |
| parser.add_argument("--interactive", action="store_true", help="Start interactive chat") | |
| parser.add_argument("--single_test", type=str, help="Test single prompt") | |
| args = parser.parse_args() | |
| # Load model | |
| model, tokenizer = load_dwrko_model(args.model_path) | |
| if args.test_suite: | |
| run_test_suite(model, tokenizer) | |
| if args.single_test: | |
| print(f"\nπ Testing single prompt: {args.single_test}") | |
| response, tps = generate_response(model, tokenizer, args.single_test) | |
| print(f"π€ Dwrko-M1.0: {response}") | |
| print(f"β‘ Speed: {tps:.1f} tokens/second") | |
| if args.interactive: | |
| interactive_mode(model, tokenizer) | |
| if not any([args.test_suite, args.interactive, args.single_test]): | |
| print("\nβ οΈ Please specify --test_suite, --interactive, or --single_test") | |
| print("Example: python test_dwrko.py --model_path ./dwrko-m1.0 --test_suite") | |
| if __name__ == "__main__": | |
| main() |