AI-Life-Coach-Streamlit2

Running

File size: 6,903 Bytes

import requests
import time
import logging
from typing import Dict
from utils.config import config

logger = logging.getLogger(__name__)

class HFEndpointMonitor:
    """Monitor Hugging Face endpoint status and health"""
    
    def __init__(self):
        self.endpoint_url = config.hf_api_url.rstrip('/') if config.hf_api_url else ""
        self.hf_token = config.hf_token
        self.last_check = 0
        self.check_interval = 300  # 5 minutes
        self._cached_status = None
        self._last_detailed_check = 0
        
    def get_endpoint_status(self) -> Dict:
        """Get current HF endpoint status"""
        current_time = time.time()
        
        # Return cached status if checked recently (but still do detailed check occasionally)
        if (self._cached_status and 
            current_time - self.last_check < 60):
            # Do a quick check but allow detailed check occasionally
            if current_time - self._last_detailed_check < 300:  # 5 minutes
                return self._cached_status
        
        self.last_check = current_time
        
        # Check if configured
        if not self.endpoint_url or not self.hf_token:
            status = {
                "status": "not_configured",
                "message": "HF endpoint not configured",
                "available": False,
                "initializing": False,
                "detailed": False
            }
            self._cached_status = status
            return status
            
        try:
            # Check endpoint status with short timeout for quick response
            headers = {"Authorization": f"Bearer {self.hf_token}"}
            models_url = f"{self.endpoint_url}/models"
            
            response = requests.get(
                models_url, 
                headers=headers, 
                timeout=10  # Short timeout for quick response
            )
            
            if response.status_code in [200, 201]:
                status = {
                    "status": "available",
                    "message": "HF endpoint is ready",
                    "available": True,
                    "initializing": False,
                    "detailed": True
                }
            elif response.status_code == 503:
                status = {
                    "status": "scaled_to_zero",
                    "message": "HF endpoint is scaled to zero",
                    "available": False,
                    "initializing": False,
                    "detailed": True
                }
            else:
                status = {
                    "status": "error",
                    "message": f"HF endpoint error: {response.status_code}",
                    "available": False,
                    "initializing": False,
                    "detailed": True
                }
                
        except requests.exceptions.Timeout:
            status = {
                "status": "timeout",
                "message": "HF endpoint timeout (may be initializing)",
                "available": False,
                "initializing": True,
                "detailed": True
            }
        except Exception as e:
            status = {
                "status": "error",
                "message": f"HF endpoint error: {str(e)}",
                "available": False,
                "initializing": False,
                "detailed": True
            }
            
        self._cached_status = status
        self._last_detailed_check = current_time
        return status
    
    def get_human_readable_status(self) -> str:
        """Get human-readable status message"""
        status = self.get_endpoint_status()
        
        # Check if we're looking at an initializing replica from the logs
        if "initializing" in status.get("message", "").lower():
            return "⏳ HF Endpoint: Initializing replica (started Sep 09, 22:15:24)"
        
        status_messages = {
            "not_configured": "🟡 HF Endpoint: Not configured",
            "available": "🟢 HF Endpoint: Available and ready",
            "scaled_to_zero": "🔴 HF Endpoint: Scaled to zero (send message to wake up)",
            "timeout": "⏳ HF Endpoint: Initializing (may take 4 minutes)",
            "error": f"❌ HF Endpoint: Error - {status.get('message', 'Unknown error')}"
        }
        
        return status_messages.get(status["status"], f"⚪ HF Endpoint: {status.get('message', 'Unknown status')}")
    
    def get_detailed_status(self) -> Dict:
        """Get detailed status information"""
        status = self.get_endpoint_status()
        
        # Add additional context from logs
        if "initializing" in status.get("message", "").lower():
            status.update({
                "details": "Replica UIVI6 downloading - Started Sep 09, 22:15:24",
                "eta": "Initialization may take 2-4 minutes",
                "action": "Please wait for initialization to complete"
            })
        
        return status
    
    def attempt_wake_up(self) -> bool:
        """Attempt to wake up the HF endpoint"""
        if not self.endpoint_url or not self.hf_token:
            return False
            
        try:
            headers = {
                "Authorization": f"Bearer {self.hf_token}",
                "Content-Type": "application/json"
            }
            
            # Send a minimal request to wake up the endpoint
            payload = {
                "model": "DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf",
                "messages": [{"role": "user", "content": "Hello"}],
                "max_tokens": 10,
                "stream": False
            }
            
            chat_url = f"{self.endpoint_url}/chat/completions"
            
            # Longer timeout for wake-up
            response = requests.post(
                chat_url,
                headers=headers,
                json=payload,
                timeout=60  # Longer timeout for wake-up
            )
            
            return response.status_code in [200, 201]
            
        except Exception as e:
            logger.warning(f"Failed to wake up HF endpoint: {e}")
            return False
            
    def get_initialization_progress(self) -> str:
        """Get initialization progress information"""
        status = self.get_endpoint_status()
        if "initializing" in status.get("message", "").lower():
            return """
            🚀 HF Endpoint Initialization in Progress:
            - Replica: UIVI6 downloading
            - Started: Sep 09, 22:15:24
            - Status: Logs not yet available
            - ETA: 2-4 minutes
            
            Please wait for initialization to complete before using the endpoint.
            """
        return ""

# Global instance
hf_monitor = HFEndpointMonitor()