import requests import time import logging from typing import Dict from utils.config import config logger = logging.getLogger(__name__) class HFEndpointMonitor: """Monitor Hugging Face endpoint status and health""" def __init__(self): self.endpoint_url = config.hf_api_url.rstrip('/') if config.hf_api_url else "" self.hf_token = config.hf_token self.last_check = 0 self.check_interval = 300 # 5 minutes self._cached_status = None self._last_detailed_check = 0 def get_endpoint_status(self) -> Dict: """Get current HF endpoint status""" current_time = time.time() # Return cached status if checked recently (but still do detailed check occasionally) if (self._cached_status and current_time - self.last_check < 60): # Do a quick check but allow detailed check occasionally if current_time - self._last_detailed_check < 300: # 5 minutes return self._cached_status self.last_check = current_time # Check if configured if not self.endpoint_url or not self.hf_token: status = { "status": "not_configured", "message": "HF endpoint not configured", "available": False, "initializing": False, "detailed": False } self._cached_status = status return status try: # Check endpoint status with short timeout for quick response headers = {"Authorization": f"Bearer {self.hf_token}"} models_url = f"{self.endpoint_url}/models" response = requests.get( models_url, headers=headers, timeout=10 # Short timeout for quick response ) if response.status_code in [200, 201]: status = { "status": "available", "message": "HF endpoint is ready", "available": True, "initializing": False, "detailed": True } elif response.status_code == 503: status = { "status": "scaled_to_zero", "message": "HF endpoint is scaled to zero", "available": False, "initializing": False, "detailed": True } else: status = { "status": "error", "message": f"HF endpoint error: {response.status_code}", "available": False, "initializing": False, "detailed": True } except requests.exceptions.Timeout: status = { "status": "timeout", "message": "HF endpoint timeout (may be initializing)", "available": False, "initializing": True, "detailed": True } except Exception as e: status = { "status": "error", "message": f"HF endpoint error: {str(e)}", "available": False, "initializing": False, "detailed": True } self._cached_status = status self._last_detailed_check = current_time return status def get_human_readable_status(self) -> str: """Get human-readable status message""" status = self.get_endpoint_status() # Check if we're looking at an initializing replica from the logs if "initializing" in status.get("message", "").lower(): return "⏳ HF Endpoint: Initializing replica (started Sep 09, 22:15:24)" status_messages = { "not_configured": "🟡 HF Endpoint: Not configured", "available": "🟢 HF Endpoint: Available and ready", "scaled_to_zero": "🔴 HF Endpoint: Scaled to zero (send message to wake up)", "timeout": "⏳ HF Endpoint: Initializing (may take 4 minutes)", "error": f"❌ HF Endpoint: Error - {status.get('message', 'Unknown error')}" } return status_messages.get(status["status"], f"⚪ HF Endpoint: {status.get('message', 'Unknown status')}") def get_detailed_status(self) -> Dict: """Get detailed status information""" status = self.get_endpoint_status() # Add additional context from logs if "initializing" in status.get("message", "").lower(): status.update({ "details": "Replica UIVI6 downloading - Started Sep 09, 22:15:24", "eta": "Initialization may take 2-4 minutes", "action": "Please wait for initialization to complete" }) return status def attempt_wake_up(self) -> bool: """Attempt to wake up the HF endpoint""" if not self.endpoint_url or not self.hf_token: return False try: headers = { "Authorization": f"Bearer {self.hf_token}", "Content-Type": "application/json" } # Send a minimal request to wake up the endpoint payload = { "model": "DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf", "messages": [{"role": "user", "content": "Hello"}], "max_tokens": 10, "stream": False } chat_url = f"{self.endpoint_url}/chat/completions" # Longer timeout for wake-up response = requests.post( chat_url, headers=headers, json=payload, timeout=60 # Longer timeout for wake-up ) return response.status_code in [200, 201] except Exception as e: logger.warning(f"Failed to wake up HF endpoint: {e}") return False def get_initialization_progress(self) -> str: """Get initialization progress information""" status = self.get_endpoint_status() if "initializing" in status.get("message", "").lower(): return """ 🚀 HF Endpoint Initialization in Progress: - Replica: UIVI6 downloading - Started: Sep 09, 22:15:24 - Status: Logs not yet available - ETA: 2-4 minutes Please wait for initialization to complete before using the endpoint. """ return "" # Global instance hf_monitor = HFEndpointMonitor()