File size: 11,814 Bytes
e900a8d
 
 
 
 
 
 
 
 
 
 
 
2a0dea8
 
 
e900a8d
 
 
e9b4a9e
2a0dea8
 
482aace
 
 
 
2a0dea8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e900a8d
 
e9b4a9e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e900a8d
2a0dea8
e9b4a9e
2a0dea8
 
 
 
 
 
e9b4a9e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2a0dea8
e9b4a9e
 
 
 
2a0dea8
e900a8d
 
 
2a0dea8
 
e9b4a9e
 
e900a8d
 
 
2a0dea8
e900a8d
 
e9b4a9e
 
 
 
 
 
 
e900a8d
2a0dea8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
482aace
2a0dea8
 
 
 
 
 
 
 
 
 
482aace
2a0dea8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5d5e39
 
 
 
 
 
2a0dea8
b5d5e39
2a0dea8
b5d5e39
 
 
 
 
 
 
2a0dea8
b5d5e39
2a0dea8
b5d5e39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac83e06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e900a8d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
import requests
import time
import logging
from typing import Dict, Optional
from utils.config import config

logger = logging.getLogger(__name__)

class HFEndpointMonitor:
    """Monitor Hugging Face endpoint status and health"""
    
    def __init__(self):
        # Clean the endpoint URL
        raw_url = config.hf_api_url or ""
        self.endpoint_url = self._clean_endpoint_url(raw_url)
        self.hf_token = config.hf_token
        self.is_initialized = False
        self.last_check = 0
        self.check_interval = 300  # Increase from 60 to 300 seconds (5 minutes)
        self.warmup_attempts = 0
        self.max_warmup_attempts = 3
        self.warmup_count = 0
        self.successful_requests = 0
        self.failed_requests = 0
        self.avg_response_time = 0
        
        logger.info(f"Initialized HF Monitor with URL: {self.endpoint_url}")

    def _clean_endpoint_url(self, url: str) -> str:
        """Clean and validate endpoint URL"""
        if not url:
            return ""
            
        # Remove environment variable names if present
        url = url.replace('hf_api_endpoint_url=', '')
        url = url.replace('HF_API_ENDPOINT_URL=', '')
        
        # Strip whitespace
        url = url.strip()
        
        # Ensure it starts with https://
        if url and not url.startswith(('http://', 'https://')):
            if 'huggingface.cloud' in url:
                url = 'https://' + url
            else:
                url = 'https://' + url
        
        # Remove trailing slashes but keep /v1 if present
        if url.endswith('/'):
            url = url.rstrip('/')
            
        return url

    def check_endpoint_status(self) -> Dict:
        """Check if HF endpoint is available and initialized with rate limiting"""
        current_time = time.time()
        
        # Don't check too frequently - minimum 1 minute between checks
        if current_time - self.last_check < 60:
            # Return cached status or basic status
            return {
                'available': getattr(self, '_last_available', False),
                'status_code': getattr(self, '_last_status_code', None),
                'initialized': getattr(self, '_last_initialized', False),
                'timestamp': self.last_check
            }
        
        # Proceed with actual check
        self.last_check = current_time
        
        try:
            if not self.endpoint_url or not self.hf_token:
                status_info = {
                    'available': False,
                    'status_code': None,
                    'initialized': False,
                    'error': 'URL or token not configured',
                    'timestamp': time.time()
                }
            else:
                # Properly construct the models endpoint URL
                models_url = f"{self.endpoint_url.rstrip('/')}/models"
                logger.info(f"Checking HF endpoint at: {models_url}")
                
                headers = {"Authorization": f"Bearer {self.hf_token}"}
                
                response = requests.get(
                    models_url,
                    headers=headers,
                    timeout=15
                )
                
                status_info = {
                    'available': response.status_code in [200, 201],
                    'status_code': response.status_code,
                    'initialized': self._is_endpoint_initialized(response),
                    'response_time': response.elapsed.total_seconds(),
                    'timestamp': time.time()
                }
                
                if response.status_code not in [200, 201]:
                    status_info['error'] = f"HTTP {response.status_code}: {response.text[:200]}"
                
                logger.info(f"HF Endpoint Status: {status_info}")
            
            # Cache the results
            self._last_available = status_info['available']
            self._last_status_code = status_info['status_code']
            self._last_initialized = status_info.get('initialized', False)
            
            return status_info
            
        except Exception as e:
            error_msg = str(e)
            logger.error(f"HF endpoint check failed: {error_msg}")
            
            status_info = {
                'available': False,
                'status_code': None,
                'initialized': False,
                'error': error_msg,
                'timestamp': time.time()
            }
            
            # Cache the results
            self._last_available = False
            self._last_status_code = None
            self._last_initialized = False
            
            return status_info

    def _is_endpoint_initialized(self, response) -> bool:
        """Determine if endpoint is fully initialized"""
        try:
            data = response.json()
            return 'data' in data or 'models' in data
        except:
            return response.status_code in [200, 201]

    def warm_up_endpoint(self) -> bool:
        """Send a warm-up request to initialize the endpoint"""
        try:
            if not self.endpoint_url or not self.hf_token:
                logger.warning("Cannot warm up HF endpoint - URL or token not configured")
                return False
            
            self.warmup_attempts += 1
            logger.info(f"Warming up HF endpoint (attempt {self.warmup_attempts})...")
            
            headers = {
                "Authorization": f"Bearer {self.hf_token}",
                "Content-Type": "application/json"
            }
            
            # Construct proper chat completions URL
            chat_url = f"{self.endpoint_url.rstrip('/')}/chat/completions"
            logger.info(f"Sending warm-up request to: {chat_url}")
            
            payload = {
                "model": "DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf",
                "messages": [{"role": "user", "content": "Hello"}],
                "max_tokens": 10,
                "stream": False
            }
            
            response = requests.post(
                chat_url,
                headers=headers,
                json=payload,
                timeout=45  # Longer timeout for cold start
            )
            
            success = response.status_code in [200, 201]
            if success:
                self.is_initialized = True
                self.warmup_count += 1
                self.warmup_attempts = 0  # Reset on success
                logger.info("βœ… HF endpoint warmed up successfully")
            else:
                logger.warning(f"⚠️ HF endpoint warm-up response: {response.status_code}")
                logger.debug(f"Response body: {response.text[:500]}")
            
            return success
            
        except Exception as e:
            logger.error(f"HF endpoint warm-up failed: {e}")
            self.failed_requests += 1
            return False

    def get_status_summary(self) -> str:
        """Get human-readable status summary"""
        status = self.check_endpoint_status()
        if status['available']:
            if status.get('initialized', False):
                return "🟒 HF Endpoint: Available and Initialized"
            else:
                return "🟑 HF Endpoint: Available but Initializing"
        else:
            return "πŸ”΄ HF Endpoint: Unavailable"

    def handle_scale_to_zero(self) -> bool:
        """Handle scale-to-zero behavior with user feedback"""
        logger.info("HF endpoint appears to be scaled to zero. Attempting to wake it up...")
        
        # Try to warm up the endpoint
        for attempt in range(self.max_warmup_attempts):
            logger.info(f"Wake-up attempt {attempt + 1}/{self.max_warmup_attempts}")
            if self.warm_up_endpoint():
                logger.info("βœ… HF endpoint successfully woken up!")
                return True
            time.sleep(10)  # Wait between attempts
        
        logger.error("❌ Failed to wake up HF endpoint after all attempts")
        return False

    def get_detailed_status(self) -> Dict:
        """Get detailed HF endpoint status with metrics"""
        try:
            headers = {"Authorization": f"Bearer {self.hf_token}"}
            
            # Get model info
            models_url = f"{self.endpoint_url.rstrip('/')}/models"
            model_response = requests.get(
                models_url,
                headers=headers,
                timeout=10
            )
            
            # Get endpoint info if available
            endpoint_info = {}
            try:
                info_url = f"{self.endpoint_url.rstrip('/')}/info"
                info_response = requests.get(
                    info_url,
                    headers=headers,
                    timeout=10
                )
                if info_response.status_code == 200:
                    endpoint_info = info_response.json()
            except:
                pass
            
            status_info = {
                'available': model_response.status_code == 200,
                'status_code': model_response.status_code,
                'initialized': self._is_endpoint_initialized(model_response),
                'endpoint_info': endpoint_info,
                'last_checked': time.time(),
                'warmup_attempts': getattr(self, 'warmup_attempts', 0),
                'is_warming_up': getattr(self, 'is_warming_up', False)
            }
            
            return status_info
            
        except Exception as e:
            return {
                'available': False,
                'status_code': None,
                'initialized': False,
                'error': str(e),
                'last_checked': time.time()
            }

    def get_performance_metrics(self) -> Dict:
        """Get HF endpoint performance metrics"""
        return {
            'warmup_count': getattr(self, 'warmup_count', 0),
            'successful_requests': getattr(self, 'successful_requests', 0),
            'failed_requests': getattr(self, 'failed_requests', 0),
            'average_response_time': getattr(self, 'avg_response_time', 0)
        }

    # Add enhanced status tracking methods
    def get_enhanced_status(self) -> Dict:
        """Get enhanced HF endpoint status with engagement tracking"""
        basic_status = self.check_endpoint_status()
        
        return {
            **basic_status,
            "engagement_level": self._determine_engagement_level(),
            "last_engagement": getattr(self, '_last_engagement_time', None),
            "total_engagements": getattr(self, '_total_engagements', 0),
            "current_research_topic": getattr(self, '_current_research_topic', None)
        }

    def _determine_engagement_level(self) -> str:
        """Determine current engagement level"""
        if not self.is_initialized:
            return "idle"
        elif getattr(self, '_currently_analyzing', False):
            return "analyzing"
        elif getattr(self, '_pending_research', False):
            return "research_pending"
        else:
            return "ready"

    def start_hf_analysis(self, topic: str = None):
        """Start HF analysis with topic tracking"""
        self._currently_analyzing = True
        self._last_engagement_time = time.time()
        self._total_engagements = getattr(self, '_total_engagements', 0) + 1
        if topic:
            self._current_research_topic = topic

    def finish_hf_analysis(self):
        """Finish HF analysis"""
        self._currently_analyzing = False
        self._current_research_topic = None

# Global instance
hf_monitor = HFEndpointMonitor()