turkish-medical-model-api / scripts /enhanced_test_l4.py
Conquerorr0
feat: L4 optimization deployment - 2025-07-25 10:08
dc4fe0a
import requests
import json
import time
# Test Turkish Medical Model API - L4 Optimized
BASE_URL = "https://conquerorr000-turkish-medical-model-api.hf.space"
def test_endpoint(endpoint, method="GET", data=None):
"""Test an API endpoint"""
url = f"{BASE_URL}{endpoint}"
try:
start_time = time.time()
if method == "POST":
response = requests.post(url, json=data, timeout=60)
else:
response = requests.get(url, timeout=30)
response_time = time.time() - start_time
print(f"\n{'='*50}")
print(f" {method} {endpoint}")
print(f" Status Code: {response.status_code}")
print(f"⏱️ Response Time: {response_time:.2f}s")
if response.status_code == 200:
result = response.json()
print(f" Response:")
print(json.dumps(result, indent=2, ensure_ascii=False))
return result
else:
print(f"❌ Error: {response.status_code}")
print(f"📋 Response: {response.text}")
return None
except Exception as e:
print(f" Error testing {endpoint}: {e}")
return None
def test_medical_conversation():
"""Test a full medical conversation"""
print(f"\n{'='*50}")
print(" MEDICAL CONVERSATION TEST")
print(f"{'='*50}")
# Conversation messages
conversation_data = {
"messages": [
{"role": "user", "content": "Merhaba doktor, 3 gündür ateşim var."},
{"role": "assistant", "content": "Merhaba. Ateşinizin kaç derece olduğunu ölçtünüz mü?"},
{"role": "user", "content": "Evet, 38.5 derece civarında. Ayrıca boğazım da ağrıyor."}
],
"max_tokens": 150,
"temperature": 0.7
}
return test_endpoint("/conversation", "POST", conversation_data)
def performance_benchmark():
"""Run performance benchmark"""
print(f"\n{'='*50}")
print(" PERFORMANCE BENCHMARK")
print(f"{'='*50}")
test_cases = [
"Başım ağrıyor ve mide bulantım var.",
"2 haftadır öksürüyorum ve balgamım var.",
"Dizlerimde ağrı var, özellikle sabahları.",
"Uykusuzluk problemi yaşıyorum, ne yapmalıyım?",
"Alerjik reaksiyonum olabilir, derimde kaşıntı var."
]
total_time = 0
successful_tests = 0
for i, test_case in enumerate(test_cases, 1):
print(f"\n Test Case {i}: {test_case}")
chat_data = {
"message": test_case,
"max_tokens": 120,
"temperature": 0.7
}
result = test_endpoint("/chat", "POST", chat_data)
if result and result.get("response"):
generation_time = result.get("generation_time", 0)
total_time += generation_time
successful_tests += 1
print(f" Generation Time: {generation_time:.2f}s")
print(f" Response Quality: {'Clean' if len(result['response']) > 20 and 'Hasta' not in result['response'] else 'Needs Improvement'}")
else:
print(" Test failed")
if successful_tests > 0:
avg_time = total_time / successful_tests
print(f"\n BENCHMARK RESULTS:")
print(f" Successful Tests: {successful_tests}/{len(test_cases)}")
print(f" Average Generation Time: {avg_time:.2f}s")
print(f" Performance Rating: {'Excellent' if avg_time < 5 else 'Good' if avg_time < 7 else 'Needs Optimization'}")
# Main test execution
print(" Turkish Medical Model API - L4 Performance Test")
print(" GPU: Nvidia L4 24GB VRAM")
print(" Mode: FP16 Full Precision + Flash Attention")
# 1. Health Check
health_result = test_endpoint("/health")
if health_result and health_result.get("model_loaded"):
print("\n Model is loaded and ready!")
# 2. Memory Status
test_endpoint("/memory-status")
# 3. Debug Info
test_endpoint("/debug")
# 4. Quick Test
test_endpoint("/test")
# 5. Medical Conversation Test
test_medical_conversation()
# 6. Performance Benchmark
performance_benchmark()
else:
print("\n Model not ready, skipping performance tests")
print("ℹ Wait for model to load and try again")
print(f"\n{'='*50}")
print(" L4 Performance Test Completed!")