File size: 4,386 Bytes
dc4fe0a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 |
import requests
import json
import time
# Test Turkish Medical Model API - L4 Optimized
BASE_URL = "https://conquerorr000-turkish-medical-model-api.hf.space"
def test_endpoint(endpoint, method="GET", data=None):
"""Test an API endpoint"""
url = f"{BASE_URL}{endpoint}"
try:
start_time = time.time()
if method == "POST":
response = requests.post(url, json=data, timeout=60)
else:
response = requests.get(url, timeout=30)
response_time = time.time() - start_time
print(f"\n{'='*50}")
print(f" {method} {endpoint}")
print(f" Status Code: {response.status_code}")
print(f"⏱️ Response Time: {response_time:.2f}s")
if response.status_code == 200:
result = response.json()
print(f" Response:")
print(json.dumps(result, indent=2, ensure_ascii=False))
return result
else:
print(f"❌ Error: {response.status_code}")
print(f"📋 Response: {response.text}")
return None
except Exception as e:
print(f" Error testing {endpoint}: {e}")
return None
def test_medical_conversation():
"""Test a full medical conversation"""
print(f"\n{'='*50}")
print(" MEDICAL CONVERSATION TEST")
print(f"{'='*50}")
# Conversation messages
conversation_data = {
"messages": [
{"role": "user", "content": "Merhaba doktor, 3 gündür ateşim var."},
{"role": "assistant", "content": "Merhaba. Ateşinizin kaç derece olduğunu ölçtünüz mü?"},
{"role": "user", "content": "Evet, 38.5 derece civarında. Ayrıca boğazım da ağrıyor."}
],
"max_tokens": 150,
"temperature": 0.7
}
return test_endpoint("/conversation", "POST", conversation_data)
def performance_benchmark():
"""Run performance benchmark"""
print(f"\n{'='*50}")
print(" PERFORMANCE BENCHMARK")
print(f"{'='*50}")
test_cases = [
"Başım ağrıyor ve mide bulantım var.",
"2 haftadır öksürüyorum ve balgamım var.",
"Dizlerimde ağrı var, özellikle sabahları.",
"Uykusuzluk problemi yaşıyorum, ne yapmalıyım?",
"Alerjik reaksiyonum olabilir, derimde kaşıntı var."
]
total_time = 0
successful_tests = 0
for i, test_case in enumerate(test_cases, 1):
print(f"\n Test Case {i}: {test_case}")
chat_data = {
"message": test_case,
"max_tokens": 120,
"temperature": 0.7
}
result = test_endpoint("/chat", "POST", chat_data)
if result and result.get("response"):
generation_time = result.get("generation_time", 0)
total_time += generation_time
successful_tests += 1
print(f" Generation Time: {generation_time:.2f}s")
print(f" Response Quality: {'Clean' if len(result['response']) > 20 and 'Hasta' not in result['response'] else 'Needs Improvement'}")
else:
print(" Test failed")
if successful_tests > 0:
avg_time = total_time / successful_tests
print(f"\n BENCHMARK RESULTS:")
print(f" Successful Tests: {successful_tests}/{len(test_cases)}")
print(f" Average Generation Time: {avg_time:.2f}s")
print(f" Performance Rating: {'Excellent' if avg_time < 5 else 'Good' if avg_time < 7 else 'Needs Optimization'}")
# Main test execution
print(" Turkish Medical Model API - L4 Performance Test")
print(" GPU: Nvidia L4 24GB VRAM")
print(" Mode: FP16 Full Precision + Flash Attention")
# 1. Health Check
health_result = test_endpoint("/health")
if health_result and health_result.get("model_loaded"):
print("\n Model is loaded and ready!")
# 2. Memory Status
test_endpoint("/memory-status")
# 3. Debug Info
test_endpoint("/debug")
# 4. Quick Test
test_endpoint("/test")
# 5. Medical Conversation Test
test_medical_conversation()
# 6. Performance Benchmark
performance_benchmark()
else:
print("\n Model not ready, skipping performance tests")
print("ℹ Wait for model to load and try again")
print(f"\n{'='*50}")
print(" L4 Performance Test Completed!")
|