Spaces:
Running
Running
File size: 2,047 Bytes
742b2a5 ab6d29f 001a1f0 ab6d29f 03da349 ab6d29f 001a1f0 ab6d29f 03da349 ab6d29f 03da349 ab6d29f 001a1f0 742b2a5 001a1f0 03da349 001a1f0 ab6d29f 001a1f0 03da349 001a1f0 03da349 001a1f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
from openai import OpenAI
import os
import time
# Use your existing Hugging Face endpoint
client = OpenAI(
base_url="https://zxzbfrlg3ssrk7d9.us-east-1.aws.endpoints.huggingface.cloud/v1/",
api_key=os.getenv("HF_TOKEN")
)
def analyze_with_model(prompt):
"""Analyze prompt with LLM, returning a generator for streaming"""
try:
# Use the Hugging Face Inference API with proper streaming
response = client.chat.completions.create(
model="DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf",
messages=[{"role": "user", "content": prompt}],
stream=True, # Enable streaming for real-time responses
temperature=0.7,
max_tokens=8192, # Increased token limit
timeout=120 # Increased timeout for longer responses
)
# Stream the response chunks
for chunk in response:
content = chunk.choices[0].delta.content
if content:
yield content
time.sleep(0.01) # Smooth out the stream
except Exception as e:
error_msg = str(e)
# Enhanced error detection for common Hugging Face issues
if "503" in error_msg:
yield f"Error during analysis: Service temporarily unavailable (503). The model server is likely initializing. Please wait 5 minutes and try again. Details: {error_msg}"
elif "timeout" in error_msg.lower():
yield f"Error during analysis: Request timed out. The model server may be initializing. Please wait 5 minutes and try again. Details: {error_msg}"
elif "connection" in error_msg.lower():
yield f"Error during analysis: Connection error. The model server may be initializing. Please wait 5 minutes and try again. Details: {error_msg}"
elif "limit" in error_msg.lower():
yield f"Error during analysis: Rate limit exceeded. Please wait a moment and try again. Details: {error_msg}"
else:
yield f"Error during analysis: {error_msg}"
|