File size: 4,386 Bytes
dc4fe0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import requests
import json
import time

# Test Turkish Medical Model API - L4 Optimized
BASE_URL = "https://conquerorr000-turkish-medical-model-api.hf.space"

def test_endpoint(endpoint, method="GET", data=None):
    """Test an API endpoint"""
    url = f"{BASE_URL}{endpoint}"
    
    try:
        start_time = time.time()
        
        if method == "POST":
            response = requests.post(url, json=data, timeout=60)
        else:
            response = requests.get(url, timeout=30)
        
        response_time = time.time() - start_time
        
        print(f"\n{'='*50}")
        print(f" {method} {endpoint}")
        print(f" Status Code: {response.status_code}")
        print(f"⏱️ Response Time: {response_time:.2f}s")
        
        if response.status_code == 200:
            result = response.json()
            print(f" Response:")
            print(json.dumps(result, indent=2, ensure_ascii=False))
            return result
        else:
            print(f"❌ Error: {response.status_code}")
            print(f"📋 Response: {response.text}")
            return None
        
    except Exception as e:
        print(f" Error testing {endpoint}: {e}")
        return None

def test_medical_conversation():
    """Test a full medical conversation"""
    print(f"\n{'='*50}")
    print(" MEDICAL CONVERSATION TEST")
    print(f"{'='*50}")
    
    # Conversation messages
    conversation_data = {
        "messages": [
            {"role": "user", "content": "Merhaba doktor, 3 gündür ateşim var."},
            {"role": "assistant", "content": "Merhaba. Ateşinizin kaç derece olduğunu ölçtünüz mü?"},
            {"role": "user", "content": "Evet, 38.5 derece civarında. Ayrıca boğazım da ağrıyor."}
        ],
        "max_tokens": 150,
        "temperature": 0.7
    }
    
    return test_endpoint("/conversation", "POST", conversation_data)

def performance_benchmark():
    """Run performance benchmark"""
    print(f"\n{'='*50}")
    print(" PERFORMANCE BENCHMARK")
    print(f"{'='*50}")
    
    test_cases = [
        "Başım ağrıyor ve mide bulantım var.",
        "2 haftadır öksürüyorum ve balgamım var.",
        "Dizlerimde ağrı var, özellikle sabahları.",
        "Uykusuzluk problemi yaşıyorum, ne yapmalıyım?",
        "Alerjik reaksiyonum olabilir, derimde kaşıntı var."
    ]
    
    total_time = 0
    successful_tests = 0
    
    for i, test_case in enumerate(test_cases, 1):
        print(f"\n Test Case {i}: {test_case}")
        
        chat_data = {
            "message": test_case,
            "max_tokens": 120,
            "temperature": 0.7
        }
        
        result = test_endpoint("/chat", "POST", chat_data)
        
        if result and result.get("response"):
            generation_time = result.get("generation_time", 0)
            total_time += generation_time
            successful_tests += 1
            print(f" Generation Time: {generation_time:.2f}s")
            print(f" Response Quality: {'Clean' if len(result['response']) > 20 and 'Hasta' not in result['response'] else 'Needs Improvement'}")
        else:
            print(" Test failed")
    
    if successful_tests > 0:
        avg_time = total_time / successful_tests
        print(f"\n BENCHMARK RESULTS:")
        print(f" Successful Tests: {successful_tests}/{len(test_cases)}")
        print(f" Average Generation Time: {avg_time:.2f}s")
        print(f" Performance Rating: {'Excellent' if avg_time < 5 else 'Good' if avg_time < 7 else 'Needs Optimization'}")

# Main test execution
print(" Turkish Medical Model API - L4 Performance Test")
print(" GPU: Nvidia L4 24GB VRAM")
print(" Mode: FP16 Full Precision + Flash Attention")

# 1. Health Check
health_result = test_endpoint("/health")

if health_result and health_result.get("model_loaded"):
    print("\n Model is loaded and ready!")
    
    # 2. Memory Status
    test_endpoint("/memory-status")
    
    # 3. Debug Info
    test_endpoint("/debug")
    
    # 4. Quick Test
    test_endpoint("/test")
    
    # 5. Medical Conversation Test
    test_medical_conversation()
    
    # 6. Performance Benchmark
    performance_benchmark()
    
else:
    print("\n Model not ready, skipping performance tests")
    print("ℹ Wait for model to load and try again")

print(f"\n{'='*50}")
print(" L4 Performance Test Completed!")