|
import requests |
|
import time |
|
import logging |
|
from typing import Dict |
|
from utils.config import config |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
class HFEndpointMonitor: |
|
"""Monitor Hugging Face endpoint status and health""" |
|
|
|
def __init__(self): |
|
self.endpoint_url = config.hf_api_url.rstrip('/') if config.hf_api_url else "" |
|
self.hf_token = config.hf_token |
|
self.last_check = 0 |
|
self.check_interval = 300 |
|
self._cached_status = None |
|
self._last_detailed_check = 0 |
|
|
|
def get_endpoint_status(self) -> Dict: |
|
"""Get current HF endpoint status""" |
|
current_time = time.time() |
|
|
|
|
|
if (self._cached_status and |
|
current_time - self.last_check < 60): |
|
|
|
if current_time - self._last_detailed_check < 300: |
|
return self._cached_status |
|
|
|
self.last_check = current_time |
|
|
|
|
|
if not self.endpoint_url or not self.hf_token: |
|
status = { |
|
"status": "not_configured", |
|
"message": "HF endpoint not configured", |
|
"available": False, |
|
"initializing": False, |
|
"detailed": False |
|
} |
|
self._cached_status = status |
|
return status |
|
|
|
try: |
|
|
|
headers = {"Authorization": f"Bearer {self.hf_token}"} |
|
models_url = f"{self.endpoint_url}/models" |
|
|
|
response = requests.get( |
|
models_url, |
|
headers=headers, |
|
timeout=10 |
|
) |
|
|
|
if response.status_code in [200, 201]: |
|
status = { |
|
"status": "available", |
|
"message": "HF endpoint is ready", |
|
"available": True, |
|
"initializing": False, |
|
"detailed": True |
|
} |
|
elif response.status_code == 503: |
|
status = { |
|
"status": "scaled_to_zero", |
|
"message": "HF endpoint is scaled to zero", |
|
"available": False, |
|
"initializing": False, |
|
"detailed": True |
|
} |
|
else: |
|
status = { |
|
"status": "error", |
|
"message": f"HF endpoint error: {response.status_code}", |
|
"available": False, |
|
"initializing": False, |
|
"detailed": True |
|
} |
|
|
|
except requests.exceptions.Timeout: |
|
status = { |
|
"status": "timeout", |
|
"message": "HF endpoint timeout (may be initializing)", |
|
"available": False, |
|
"initializing": True, |
|
"detailed": True |
|
} |
|
except Exception as e: |
|
status = { |
|
"status": "error", |
|
"message": f"HF endpoint error: {str(e)}", |
|
"available": False, |
|
"initializing": False, |
|
"detailed": True |
|
} |
|
|
|
self._cached_status = status |
|
self._last_detailed_check = current_time |
|
return status |
|
|
|
def get_human_readable_status(self) -> str: |
|
"""Get human-readable status message""" |
|
status = self.get_endpoint_status() |
|
|
|
|
|
if "initializing" in status.get("message", "").lower(): |
|
return "⏳ HF Endpoint: Initializing replica (started Sep 09, 22:15:24)" |
|
|
|
status_messages = { |
|
"not_configured": "🟡 HF Endpoint: Not configured", |
|
"available": "🟢 HF Endpoint: Available and ready", |
|
"scaled_to_zero": "🔴 HF Endpoint: Scaled to zero (send message to wake up)", |
|
"timeout": "⏳ HF Endpoint: Initializing (may take 4 minutes)", |
|
"error": f"❌ HF Endpoint: Error - {status.get('message', 'Unknown error')}" |
|
} |
|
|
|
return status_messages.get(status["status"], f"⚪ HF Endpoint: {status.get('message', 'Unknown status')}") |
|
|
|
def get_detailed_status(self) -> Dict: |
|
"""Get detailed status information""" |
|
status = self.get_endpoint_status() |
|
|
|
|
|
if "initializing" in status.get("message", "").lower(): |
|
status.update({ |
|
"details": "Replica UIVI6 downloading - Started Sep 09, 22:15:24", |
|
"eta": "Initialization may take 2-4 minutes", |
|
"action": "Please wait for initialization to complete" |
|
}) |
|
|
|
return status |
|
|
|
def attempt_wake_up(self) -> bool: |
|
"""Attempt to wake up the HF endpoint""" |
|
if not self.endpoint_url or not self.hf_token: |
|
return False |
|
|
|
try: |
|
headers = { |
|
"Authorization": f"Bearer {self.hf_token}", |
|
"Content-Type": "application/json" |
|
} |
|
|
|
|
|
payload = { |
|
"model": "DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf", |
|
"messages": [{"role": "user", "content": "Hello"}], |
|
"max_tokens": 10, |
|
"stream": False |
|
} |
|
|
|
chat_url = f"{self.endpoint_url}/chat/completions" |
|
|
|
|
|
response = requests.post( |
|
chat_url, |
|
headers=headers, |
|
json=payload, |
|
timeout=60 |
|
) |
|
|
|
return response.status_code in [200, 201] |
|
|
|
except Exception as e: |
|
logger.warning(f"Failed to wake up HF endpoint: {e}") |
|
return False |
|
|
|
def get_initialization_progress(self) -> str: |
|
"""Get initialization progress information""" |
|
status = self.get_endpoint_status() |
|
if "initializing" in status.get("message", "").lower(): |
|
return """ |
|
🚀 HF Endpoint Initialization in Progress: |
|
- Replica: UIVI6 downloading |
|
- Started: Sep 09, 22:15:24 |
|
- Status: Logs not yet available |
|
- ETA: 2-4 minutes |
|
|
|
Please wait for initialization to complete before using the endpoint. |
|
""" |
|
return "" |
|
|
|
|
|
hf_monitor = HFEndpointMonitor() |
|
|