myspace134v

Running

App Files Files Community

myspace134v / modules /analyzer.py

rdune71

Optimize for Hugging Face Inference API with streaming support and RAG integration

03da349 10 days ago

raw

history blame contribute delete

2.05 kB

	from openai import OpenAI
	import os
	import time

	# Use your existing Hugging Face endpoint
	client = OpenAI(
	base_url="https://zxzbfrlg3ssrk7d9.us-east-1.aws.endpoints.huggingface.cloud/v1/",
	api_key=os.getenv("HF_TOKEN")
	)

	def analyze_with_model(prompt):
	"""Analyze prompt with LLM, returning a generator for streaming"""
	try:
	# Use the Hugging Face Inference API with proper streaming
	response = client.chat.completions.create(
	model="DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf",
	messages=[{"role": "user", "content": prompt}],
	stream=True, # Enable streaming for real-time responses
	temperature=0.7,
	max_tokens=8192, # Increased token limit
	timeout=120 # Increased timeout for longer responses
	)

	# Stream the response chunks
	for chunk in response:
	content = chunk.choices[0].delta.content
	if content:
	yield content
	time.sleep(0.01) # Smooth out the stream

	except Exception as e:
	error_msg = str(e)
	# Enhanced error detection for common Hugging Face issues
	if "503" in error_msg:
	yield f"Error during analysis: Service temporarily unavailable (503). The model server is likely initializing. Please wait 5 minutes and try again. Details: {error_msg}"
	elif "timeout" in error_msg.lower():
	yield f"Error during analysis: Request timed out. The model server may be initializing. Please wait 5 minutes and try again. Details: {error_msg}"
	elif "connection" in error_msg.lower():
	yield f"Error during analysis: Connection error. The model server may be initializing. Please wait 5 minutes and try again. Details: {error_msg}"
	elif "limit" in error_msg.lower():
	yield f"Error during analysis: Rate limit exceeded. Please wait a moment and try again. Details: {error_msg}"
	else:
	yield f"Error during analysis: {error_msg}"