|
import requests |
|
import json |
|
import time |
|
|
|
|
|
BASE_URL = "https://conquerorr000-turkish-medical-model-api.hf.space" |
|
|
|
def test_endpoint(endpoint, method="GET", data=None): |
|
"""Test an API endpoint""" |
|
url = f"{BASE_URL}{endpoint}" |
|
|
|
try: |
|
start_time = time.time() |
|
|
|
if method == "POST": |
|
response = requests.post(url, json=data, timeout=60) |
|
else: |
|
response = requests.get(url, timeout=30) |
|
|
|
response_time = time.time() - start_time |
|
|
|
print(f"\n{'='*50}") |
|
print(f" {method} {endpoint}") |
|
print(f" Status Code: {response.status_code}") |
|
print(f"⏱️ Response Time: {response_time:.2f}s") |
|
|
|
if response.status_code == 200: |
|
result = response.json() |
|
print(f" Response:") |
|
print(json.dumps(result, indent=2, ensure_ascii=False)) |
|
return result |
|
else: |
|
print(f"❌ Error: {response.status_code}") |
|
print(f"📋 Response: {response.text}") |
|
return None |
|
|
|
except Exception as e: |
|
print(f" Error testing {endpoint}: {e}") |
|
return None |
|
|
|
def test_medical_conversation(): |
|
"""Test a full medical conversation""" |
|
print(f"\n{'='*50}") |
|
print(" MEDICAL CONVERSATION TEST") |
|
print(f"{'='*50}") |
|
|
|
|
|
conversation_data = { |
|
"messages": [ |
|
{"role": "user", "content": "Merhaba doktor, 3 gündür ateşim var."}, |
|
{"role": "assistant", "content": "Merhaba. Ateşinizin kaç derece olduğunu ölçtünüz mü?"}, |
|
{"role": "user", "content": "Evet, 38.5 derece civarında. Ayrıca boğazım da ağrıyor."} |
|
], |
|
"max_tokens": 150, |
|
"temperature": 0.7 |
|
} |
|
|
|
return test_endpoint("/conversation", "POST", conversation_data) |
|
|
|
def performance_benchmark(): |
|
"""Run performance benchmark""" |
|
print(f"\n{'='*50}") |
|
print(" PERFORMANCE BENCHMARK") |
|
print(f"{'='*50}") |
|
|
|
test_cases = [ |
|
"Başım ağrıyor ve mide bulantım var.", |
|
"2 haftadır öksürüyorum ve balgamım var.", |
|
"Dizlerimde ağrı var, özellikle sabahları.", |
|
"Uykusuzluk problemi yaşıyorum, ne yapmalıyım?", |
|
"Alerjik reaksiyonum olabilir, derimde kaşıntı var." |
|
] |
|
|
|
total_time = 0 |
|
successful_tests = 0 |
|
|
|
for i, test_case in enumerate(test_cases, 1): |
|
print(f"\n Test Case {i}: {test_case}") |
|
|
|
chat_data = { |
|
"message": test_case, |
|
"max_tokens": 120, |
|
"temperature": 0.7 |
|
} |
|
|
|
result = test_endpoint("/chat", "POST", chat_data) |
|
|
|
if result and result.get("response"): |
|
generation_time = result.get("generation_time", 0) |
|
total_time += generation_time |
|
successful_tests += 1 |
|
print(f" Generation Time: {generation_time:.2f}s") |
|
print(f" Response Quality: {'Clean' if len(result['response']) > 20 and 'Hasta' not in result['response'] else 'Needs Improvement'}") |
|
else: |
|
print(" Test failed") |
|
|
|
if successful_tests > 0: |
|
avg_time = total_time / successful_tests |
|
print(f"\n BENCHMARK RESULTS:") |
|
print(f" Successful Tests: {successful_tests}/{len(test_cases)}") |
|
print(f" Average Generation Time: {avg_time:.2f}s") |
|
print(f" Performance Rating: {'Excellent' if avg_time < 5 else 'Good' if avg_time < 7 else 'Needs Optimization'}") |
|
|
|
|
|
print(" Turkish Medical Model API - L4 Performance Test") |
|
print(" GPU: Nvidia L4 24GB VRAM") |
|
print(" Mode: FP16 Full Precision + Flash Attention") |
|
|
|
|
|
health_result = test_endpoint("/health") |
|
|
|
if health_result and health_result.get("model_loaded"): |
|
print("\n Model is loaded and ready!") |
|
|
|
|
|
test_endpoint("/memory-status") |
|
|
|
|
|
test_endpoint("/debug") |
|
|
|
|
|
test_endpoint("/test") |
|
|
|
|
|
test_medical_conversation() |
|
|
|
|
|
performance_benchmark() |
|
|
|
else: |
|
print("\n Model not ready, skipping performance tests") |
|
print("ℹ Wait for model to load and try again") |
|
|
|
print(f"\n{'='*50}") |
|
print(" L4 Performance Test Completed!") |
|
|