File size: 9,967 Bytes
19aaa42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
#!/usr/bin/env python3
"""
Test Suite for Maternal Health Vector Store
Validates search functionality, medical context filtering, and performance
"""

import unittest
import time
from pathlib import Path
from vector_store_manager import MaternalHealthVectorStore, SearchResult

class TestMaternalHealthVectorStore(unittest.TestCase):
    """Test suite for vector store functionality"""
    
    @classmethod
    def setUpClass(cls):
        """Set up test environment"""
        cls.vector_store = MaternalHealthVectorStore()
        
        # Load existing vector store (should exist from previous run)
        if cls.vector_store.index_file.exists():
            print("Loading existing vector store for testing...")
            success = cls.vector_store.load_existing_index()
            if not success:
                print("Failed to load existing index, creating new one...")
                cls.vector_store.create_vector_index()
        else:
            print("Creating vector store for testing...")
            cls.vector_store.create_vector_index()
    
    def test_vector_store_initialization(self):
        """Test vector store loads correctly"""
        self.assertIsNotNone(self.vector_store.index)
        self.assertGreater(self.vector_store.index.ntotal, 0)
        self.assertEqual(len(self.vector_store.documents), len(self.vector_store.metadata))
    
    def test_basic_search_functionality(self):
        """Test basic search returns relevant results"""
        query = "magnesium sulfate dosage for preeclampsia"
        results = self.vector_store.search(query, k=3)
        
        # Should return results
        self.assertGreater(len(results), 0)
        self.assertLessEqual(len(results), 3)
        
        # All results should be SearchResult objects
        for result in results:
            self.assertIsInstance(result, SearchResult)
            self.assertGreater(result.score, 0)
            self.assertIn('magnesium', result.content.lower())
    
    def test_medical_context_filtering(self):
        """Test filtering by medical content types"""
        query = "emergency management protocols"
        
        # Test filtering by emergency content
        emergency_results = self.vector_store.search_by_medical_context(
            query, 
            content_types=['emergency'],
            min_importance=0.8,
            k=5
        )
        
        # Should return emergency-specific results
        for result in emergency_results:
            self.assertEqual(result.chunk_type, 'emergency')
            self.assertGreaterEqual(result.clinical_importance, 0.8)
    
    def test_clinical_importance_filtering(self):
        """Test filtering by clinical importance"""
        query = "dosage recommendations"
        
        # Test high importance filtering
        high_importance_results = self.vector_store.search_by_medical_context(
            query,
            min_importance=0.9,
            k=10
        )
        
        # All results should have high clinical importance
        for result in high_importance_results:
            self.assertGreaterEqual(result.clinical_importance, 0.9)
    
    def test_search_performance(self):
        """Test search performance is acceptable"""
        query = "normal labor management guidelines"
        
        start_time = time.time()
        results = self.vector_store.search(query, k=5)
        search_time = time.time() - start_time
        
        # Search should be fast (under 1 second)
        self.assertLess(search_time, 1.0)
        self.assertGreater(len(results), 0)
    
    def test_maternal_health_queries(self):
        """Test specific maternal health queries return relevant results"""
        
        test_cases = [
            {
                'query': 'postpartum hemorrhage management',
                'expected_keywords': ['hemorrhage', 'postpartum', 'bleeding'],
                'min_score': 0.3
            },
            {
                'query': 'fetal heart rate monitoring',
                'expected_keywords': ['fetal', 'heart', 'rate', 'monitoring'],
                'min_score': 0.3
            },
            {
                'query': 'preeclampsia treatment protocols',
                'expected_keywords': ['preeclampsia', 'treatment', 'protocol'],
                'min_score': 0.3
            }
        ]
        
        for case in test_cases:
            with self.subTest(query=case['query']):
                results = self.vector_store.search(case['query'], k=3)
                
                # Should return results
                self.assertGreater(len(results), 0)
                
                # Check relevance
                best_result = results[0]
                self.assertGreaterEqual(best_result.score, case['min_score'])
                
                # Check if keywords appear in results
                combined_content = ' '.join([r.content.lower() for r in results])
                keyword_found = any(
                    keyword in combined_content 
                    for keyword in case['expected_keywords']
                )
                self.assertTrue(keyword_found, 
                               f"No keywords {case['expected_keywords']} found in results")
    
    def test_statistics_functionality(self):
        """Test vector store statistics are accurate"""
        stats = self.vector_store.get_statistics()
        
        # Check required fields
        required_fields = [
            'total_chunks', 'embedding_dimension', 'embedding_model',
            'chunk_type_distribution', 'clinical_importance_distribution'
        ]
        
        for field in required_fields:
            self.assertIn(field, stats)
        
        # Check values make sense
        self.assertGreater(stats['total_chunks'], 0)
        self.assertEqual(stats['embedding_dimension'], 384)
        self.assertIn('all-MiniLM-L6-v2', stats['embedding_model'])
    
    def test_dosage_information_retrieval(self):
        """Test retrieval of dosage-specific information"""
        dosage_queries = [
            {
                'query': "oxytocin dosage for labor induction",
                'content_types': ['dosage', 'emergency', 'maternal', 'procedure'],  # Include maternal and procedure
                'dosage_terms': ['oxytocin', 'administration', 'dose', 'mg', 'ml', 'unit', 'continuous']
            },
            {
                'query': "antibiotic prophylaxis dosing",
                'content_types': ['dosage', 'emergency'],
                'dosage_terms': ['mg', 'ml', 'dose', 'dosage', 'antibiotic', 'prophylaxis']
            },
            {
                'query': "magnesium sulfate administration",
                'content_types': ['dosage', 'emergency'],
                'dosage_terms': ['magnesium', 'sulfate', 'mg', 'dose', 'administration']
            }
        ]
        
        for case in dosage_queries:
            with self.subTest(query=case['query']):
                results = self.vector_store.search_by_medical_context(
                    case['query'],
                    content_types=case['content_types'],
                    k=3
                )
                
                # Should find dosage-related content
                self.assertGreater(len(results), 0)
                
                # Check for dosage-related terms
                combined_content = ' '.join([r.content.lower() for r in results])
                
                term_found = any(term in combined_content for term in case['dosage_terms'])
                self.assertTrue(term_found, 
                               f"No dosage terms {case['dosage_terms']} found for query: {case['query']}")
    
    def test_edge_cases(self):
        """Test edge cases and error handling"""
        
        # Empty query
        results = self.vector_store.search("", k=1)
        self.assertIsInstance(results, list)
        
        # Very specific query that might not match well
        results = self.vector_store.search("xyz unknown medical term", k=1)
        self.assertIsInstance(results, list)
        
        # Large k value
        results = self.vector_store.search("pregnancy", k=100)
        self.assertLessEqual(len(results), 100)

def run_comprehensive_tests():
    """Run all tests and provide detailed report"""
    print("🧪 Running Comprehensive Vector Store Tests...")
    print("=" * 60)
    
    # Create test suite
    loader = unittest.TestLoader()
    suite = loader.loadTestsFromTestCase(TestMaternalHealthVectorStore)
    
    # Run tests with detailed output
    runner = unittest.TextTestRunner(verbosity=2)
    result = runner.run(suite)
    
    # Print summary
    print("\n" + "=" * 60)
    print("📊 TEST SUMMARY:")
    print(f"  Tests run: {result.testsRun}")
    print(f"  Failures: {len(result.failures)}")
    print(f"  Errors: {len(result.errors)}")
    
    if result.wasSuccessful():
        print("✅ ALL TESTS PASSED! Vector store is working perfectly.")
    else:
        print("❌ Some tests failed. Check output above for details.")
        
        if result.failures:
            print("\nFailures:")
            for test, traceback in result.failures:
                # Extract the last meaningful line from traceback
                lines = traceback.strip().split('\n')
                error_line = lines[-1] if lines else "Unknown failure"
                print(f"  - {test}: {error_line}")
        
        if result.errors:
            print("\nErrors:")
            for test, traceback in result.errors:
                # Extract the last meaningful line from traceback
                lines = traceback.strip().split('\n')
                error_line = lines[-1] if lines else "Unknown error"
                print(f"  - {test}: {error_line}")
    
    return result.wasSuccessful()

if __name__ == "__main__":
    success = run_comprehensive_tests()
    exit(0 if success else 1)