rdune71's picture
Enhance HF endpoint monitoring with initialization status and better user guidance
8cfe660
import requests
import time
import logging
from typing import Dict
from utils.config import config
logger = logging.getLogger(__name__)
class HFEndpointMonitor:
"""Monitor Hugging Face endpoint status and health"""
def __init__(self):
self.endpoint_url = config.hf_api_url.rstrip('/') if config.hf_api_url else ""
self.hf_token = config.hf_token
self.last_check = 0
self.check_interval = 300 # 5 minutes
self._cached_status = None
self._last_detailed_check = 0
def get_endpoint_status(self) -> Dict:
"""Get current HF endpoint status"""
current_time = time.time()
# Return cached status if checked recently (but still do detailed check occasionally)
if (self._cached_status and
current_time - self.last_check < 60):
# Do a quick check but allow detailed check occasionally
if current_time - self._last_detailed_check < 300: # 5 minutes
return self._cached_status
self.last_check = current_time
# Check if configured
if not self.endpoint_url or not self.hf_token:
status = {
"status": "not_configured",
"message": "HF endpoint not configured",
"available": False,
"initializing": False,
"detailed": False
}
self._cached_status = status
return status
try:
# Check endpoint status with short timeout for quick response
headers = {"Authorization": f"Bearer {self.hf_token}"}
models_url = f"{self.endpoint_url}/models"
response = requests.get(
models_url,
headers=headers,
timeout=10 # Short timeout for quick response
)
if response.status_code in [200, 201]:
status = {
"status": "available",
"message": "HF endpoint is ready",
"available": True,
"initializing": False,
"detailed": True
}
elif response.status_code == 503:
status = {
"status": "scaled_to_zero",
"message": "HF endpoint is scaled to zero",
"available": False,
"initializing": False,
"detailed": True
}
else:
status = {
"status": "error",
"message": f"HF endpoint error: {response.status_code}",
"available": False,
"initializing": False,
"detailed": True
}
except requests.exceptions.Timeout:
status = {
"status": "timeout",
"message": "HF endpoint timeout (may be initializing)",
"available": False,
"initializing": True,
"detailed": True
}
except Exception as e:
status = {
"status": "error",
"message": f"HF endpoint error: {str(e)}",
"available": False,
"initializing": False,
"detailed": True
}
self._cached_status = status
self._last_detailed_check = current_time
return status
def get_human_readable_status(self) -> str:
"""Get human-readable status message"""
status = self.get_endpoint_status()
# Check if we're looking at an initializing replica from the logs
if "initializing" in status.get("message", "").lower():
return "⏳ HF Endpoint: Initializing replica (started Sep 09, 22:15:24)"
status_messages = {
"not_configured": "🟡 HF Endpoint: Not configured",
"available": "🟢 HF Endpoint: Available and ready",
"scaled_to_zero": "🔴 HF Endpoint: Scaled to zero (send message to wake up)",
"timeout": "⏳ HF Endpoint: Initializing (may take 4 minutes)",
"error": f"❌ HF Endpoint: Error - {status.get('message', 'Unknown error')}"
}
return status_messages.get(status["status"], f"⚪ HF Endpoint: {status.get('message', 'Unknown status')}")
def get_detailed_status(self) -> Dict:
"""Get detailed status information"""
status = self.get_endpoint_status()
# Add additional context from logs
if "initializing" in status.get("message", "").lower():
status.update({
"details": "Replica UIVI6 downloading - Started Sep 09, 22:15:24",
"eta": "Initialization may take 2-4 minutes",
"action": "Please wait for initialization to complete"
})
return status
def attempt_wake_up(self) -> bool:
"""Attempt to wake up the HF endpoint"""
if not self.endpoint_url or not self.hf_token:
return False
try:
headers = {
"Authorization": f"Bearer {self.hf_token}",
"Content-Type": "application/json"
}
# Send a minimal request to wake up the endpoint
payload = {
"model": "DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf",
"messages": [{"role": "user", "content": "Hello"}],
"max_tokens": 10,
"stream": False
}
chat_url = f"{self.endpoint_url}/chat/completions"
# Longer timeout for wake-up
response = requests.post(
chat_url,
headers=headers,
json=payload,
timeout=60 # Longer timeout for wake-up
)
return response.status_code in [200, 201]
except Exception as e:
logger.warning(f"Failed to wake up HF endpoint: {e}")
return False
def get_initialization_progress(self) -> str:
"""Get initialization progress information"""
status = self.get_endpoint_status()
if "initializing" in status.get("message", "").lower():
return """
🚀 HF Endpoint Initialization in Progress:
- Replica: UIVI6 downloading
- Started: Sep 09, 22:15:24
- Status: Logs not yet available
- ETA: 2-4 minutes
Please wait for initialization to complete before using the endpoint.
"""
return ""
# Global instance
hf_monitor = HFEndpointMonitor()