myspace134v

Running

File size: 2,047 Bytes

742b2a5
ab6d29f
001a1f0
ab6d29f
03da349
ab6d29f
 
 
 
 
 
001a1f0
ab6d29f
03da349
ab6d29f
 
 
03da349
ab6d29f
001a1f0
 
742b2a5
001a1f0
03da349
001a1f0
 
 
 
 
 
ab6d29f
001a1f0
03da349
001a1f0
 
 
 
 
 
03da349
 
001a1f0

from openai import OpenAI
import os
import time

# Use your existing Hugging Face endpoint
client = OpenAI(
    base_url="https://zxzbfrlg3ssrk7d9.us-east-1.aws.endpoints.huggingface.cloud/v1/",
    api_key=os.getenv("HF_TOKEN")
)

def analyze_with_model(prompt):
    """Analyze prompt with LLM, returning a generator for streaming"""
    try:
        # Use the Hugging Face Inference API with proper streaming
        response = client.chat.completions.create(
            model="DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf",
            messages=[{"role": "user", "content": prompt}],
            stream=True,  # Enable streaming for real-time responses
            temperature=0.7,
            max_tokens=8192,  # Increased token limit
            timeout=120  # Increased timeout for longer responses
        )

        # Stream the response chunks
        for chunk in response:
            content = chunk.choices[0].delta.content
            if content:
                yield content
            time.sleep(0.01)  # Smooth out the stream

    except Exception as e:
        error_msg = str(e)
        # Enhanced error detection for common Hugging Face issues
        if "503" in error_msg:
            yield f"Error during analysis: Service temporarily unavailable (503). The model server is likely initializing. Please wait 5 minutes and try again. Details: {error_msg}"
        elif "timeout" in error_msg.lower():
            yield f"Error during analysis: Request timed out. The model server may be initializing. Please wait 5 minutes and try again. Details: {error_msg}"
        elif "connection" in error_msg.lower():
            yield f"Error during analysis: Connection error. The model server may be initializing. Please wait 5 minutes and try again. Details: {error_msg}"
        elif "limit" in error_msg.lower():
            yield f"Error during analysis: Rate limit exceeded. Please wait a moment and try again. Details: {error_msg}"
        else:
            yield f"Error during analysis: {error_msg}"