import requests import json import time # Test Turkish Medical Model API - L4 Optimized BASE_URL = "https://conquerorr000-turkish-medical-model-api.hf.space" def test_endpoint(endpoint, method="GET", data=None): """Test an API endpoint""" url = f"{BASE_URL}{endpoint}" try: start_time = time.time() if method == "POST": response = requests.post(url, json=data, timeout=60) else: response = requests.get(url, timeout=30) response_time = time.time() - start_time print(f"\n{'='*50}") print(f" {method} {endpoint}") print(f" Status Code: {response.status_code}") print(f"⏱️ Response Time: {response_time:.2f}s") if response.status_code == 200: result = response.json() print(f" Response:") print(json.dumps(result, indent=2, ensure_ascii=False)) return result else: print(f"❌ Error: {response.status_code}") print(f"📋 Response: {response.text}") return None except Exception as e: print(f" Error testing {endpoint}: {e}") return None def test_medical_conversation(): """Test a full medical conversation""" print(f"\n{'='*50}") print(" MEDICAL CONVERSATION TEST") print(f"{'='*50}") # Conversation messages conversation_data = { "messages": [ {"role": "user", "content": "Merhaba doktor, 3 gündür ateşim var."}, {"role": "assistant", "content": "Merhaba. Ateşinizin kaç derece olduğunu ölçtünüz mü?"}, {"role": "user", "content": "Evet, 38.5 derece civarında. Ayrıca boğazım da ağrıyor."} ], "max_tokens": 150, "temperature": 0.7 } return test_endpoint("/conversation", "POST", conversation_data) def performance_benchmark(): """Run performance benchmark""" print(f"\n{'='*50}") print(" PERFORMANCE BENCHMARK") print(f"{'='*50}") test_cases = [ "Başım ağrıyor ve mide bulantım var.", "2 haftadır öksürüyorum ve balgamım var.", "Dizlerimde ağrı var, özellikle sabahları.", "Uykusuzluk problemi yaşıyorum, ne yapmalıyım?", "Alerjik reaksiyonum olabilir, derimde kaşıntı var." ] total_time = 0 successful_tests = 0 for i, test_case in enumerate(test_cases, 1): print(f"\n Test Case {i}: {test_case}") chat_data = { "message": test_case, "max_tokens": 120, "temperature": 0.7 } result = test_endpoint("/chat", "POST", chat_data) if result and result.get("response"): generation_time = result.get("generation_time", 0) total_time += generation_time successful_tests += 1 print(f" Generation Time: {generation_time:.2f}s") print(f" Response Quality: {'Clean' if len(result['response']) > 20 and 'Hasta' not in result['response'] else 'Needs Improvement'}") else: print(" Test failed") if successful_tests > 0: avg_time = total_time / successful_tests print(f"\n BENCHMARK RESULTS:") print(f" Successful Tests: {successful_tests}/{len(test_cases)}") print(f" Average Generation Time: {avg_time:.2f}s") print(f" Performance Rating: {'Excellent' if avg_time < 5 else 'Good' if avg_time < 7 else 'Needs Optimization'}") # Main test execution print(" Turkish Medical Model API - L4 Performance Test") print(" GPU: Nvidia L4 24GB VRAM") print(" Mode: FP16 Full Precision + Flash Attention") # 1. Health Check health_result = test_endpoint("/health") if health_result and health_result.get("model_loaded"): print("\n Model is loaded and ready!") # 2. Memory Status test_endpoint("/memory-status") # 3. Debug Info test_endpoint("/debug") # 4. Quick Test test_endpoint("/test") # 5. Medical Conversation Test test_medical_conversation() # 6. Performance Benchmark performance_benchmark() else: print("\n Model not ready, skipping performance tests") print("ℹ Wait for model to load and try again") print(f"\n{'='*50}") print(" L4 Performance Test Completed!")