Spaces:

nazdridoy
/

inferoxy-hub

Running

App Files Files Community

inferoxy-hub / tts_handler.py

nazdridoy

feat(handlers): pass Hugging Face profile to handlers

52fc803 verified 13 days ago

raw

history blame contribute delete

7.87 kB

	"""
	Text-to-speech functionality handler for AI-Inferoxy AI Hub.
	Handles text-to-speech generation with multiple providers.
	"""

	import os
	import gradio as gr
	import time
	import threading
	from concurrent.futures import ThreadPoolExecutor, TimeoutError as FutureTimeoutError
	from huggingface_hub import InferenceClient
	from huggingface_hub.errors import HfHubHTTPError
	from requests.exceptions import ConnectionError, Timeout, RequestException
	from hf_token_utils import get_proxy_token, report_token_status
	from utils import (
	IMAGE_CONFIG,
	validate_proxy_key,
	format_error_message,
	format_success_message,
	TTS_MODEL_CONFIGS,
	)

	# Timeout configuration for TTS generation
	TTS_GENERATION_TIMEOUT = 300 # 5 minutes max for TTS generation


	def generate_text_to_speech(
	text: str,
	model_name: str,
	provider: str,
	voice: str = "af_bella",
	speed: float = 1.0,
	audio_url: str = "",
	exaggeration: float = 0.25,
	temperature: float = 0.7,
	cfg: float = 0.5,
	client_name: str \| None = None,
	):
	"""
	Generate speech from text using the specified model and provider through AI-Inferoxy.
	"""
	# Validate proxy API key
	is_valid, error_msg = validate_proxy_key()
	if not is_valid:
	return None, error_msg

	proxy_api_key = os.getenv("PROXY_KEY")

	token_id = None
	try:
	# Get token from AI-Inferoxy proxy server with timeout handling
	print(f"🔑 TTS: Requesting token from proxy...")
	token, token_id = get_proxy_token(api_key=proxy_api_key)
	print(f"✅ TTS: Got token: {token_id}")

	print(f"🎤 TTS: Using model='{model_name}', provider='{provider}', voice='{voice}'")

	# Create client with specified provider
	client = InferenceClient(
	provider=provider,
	api_key=token
	)

	print(f"🚀 TTS: Client created, preparing generation params...")

	# Get model configuration
	model_config = TTS_MODEL_CONFIGS.get(model_name, {})
	extra_body_params = model_config.get("extra_body_params", [])

	# Prepare generation parameters
	generation_params = {
	"text": text,
	"model": model_name,
	"extra_body": {}
	}

	# Add model-specific parameters to extra_body
	if "voice" in extra_body_params:
	generation_params["extra_body"]["voice"] = voice
	if "speed" in extra_body_params:
	generation_params["extra_body"]["speed"] = speed
	if "audio_url" in extra_body_params:
	generation_params["extra_body"]["audio_url"] = audio_url
	if "exaggeration" in extra_body_params:
	generation_params["extra_body"]["exaggeration"] = exaggeration
	if "temperature" in extra_body_params:
	generation_params["extra_body"]["temperature"] = temperature
	if "cfg" in extra_body_params:
	generation_params["extra_body"]["cfg"] = cfg

	print(f"📡 TTS: Making generation request with {TTS_GENERATION_TIMEOUT}s timeout...")

	# Create generation function for timeout handling
	def generate_audio_task():
	return client.text_to_speech(**generation_params)

	# Execute with timeout using ThreadPoolExecutor
	with ThreadPoolExecutor(max_workers=1) as executor:
	future = executor.submit(generate_audio_task)

	try:
	# Generate audio with timeout
	audio = future.result(timeout=TTS_GENERATION_TIMEOUT)
	except FutureTimeoutError:
	future.cancel() # Cancel the running task
	raise TimeoutError(f"TTS generation timed out after {TTS_GENERATION_TIMEOUT} seconds")

	print(f"🎵 TTS: Generation completed! Audio type: {type(audio)}")

	# Report successful token usage
	if token_id:
	report_token_status(token_id, "success", api_key=proxy_api_key, client_name=client_name)

	return audio, format_success_message("Speech generated", f"using {model_name} on {provider} with voice {voice}")

	except ConnectionError as e:
	# Handle proxy connection errors
	error_msg = f"Cannot connect to AI-Inferoxy server: {str(e)}"
	print(f"🔌 TTS connection error: {error_msg}")
	if token_id:
	report_token_status(token_id, "error", error_msg, api_key=proxy_api_key, client_name=client_name)
	return None, format_error_message("Connection Error", "Unable to connect to the proxy server. Please check if it's running.")

	except TimeoutError as e:
	# Handle timeout errors
	error_msg = f"TTS generation timed out: {str(e)}"
	print(f"⏰ TTS timeout: {error_msg}")
	if token_id:
	report_token_status(token_id, "error", error_msg, api_key=proxy_api_key, client_name=client_name)
	return None, format_error_message("Timeout Error", f"TTS generation took too long (>{TTS_GENERATION_TIMEOUT//60} minutes). Try shorter text.")

	except HfHubHTTPError as e:
	# Handle HuggingFace API errors
	error_msg = str(e)
	print(f"🤗 TTS HF error: {error_msg}")
	if token_id:
	report_token_status(token_id, "error", error_msg, api_key=proxy_api_key, client_name=client_name)

	# Provide more user-friendly error messages
	if "401" in error_msg:
	return None, format_error_message("Authentication Error", "Invalid or expired API token. The proxy will provide a new token on retry.")
	elif "402" in error_msg:
	return None, format_error_message("Quota Exceeded", "API quota exceeded. The proxy will try alternative providers.")
	elif "429" in error_msg:
	return None, format_error_message("Rate Limited", "Too many requests. Please wait a moment and try again.")
	else:
	return None, format_error_message("HuggingFace API Error", error_msg)

	except Exception as e:
	# Handle all other errors
	error_msg = str(e)
	print(f"❌ TTS unexpected error: {error_msg}")
	if token_id:
	report_token_status(token_id, "error", error_msg, api_key=proxy_api_key)
	return None, format_error_message("Unexpected Error", f"An unexpected error occurred: {error_msg}")


	def handle_text_to_speech_generation(text_val, model_val, provider_val, voice_val, speed_val, audio_url_val, exaggeration_val, temperature_val, cfg_val, hf_token: gr.OAuthToken = None, hf_profile: gr.OAuthProfile = None):
	"""
	Handle text-to-speech generation request with validation.
	"""
	# Validate input text
	if not text_val or not text_val.strip():
	return None, format_error_message("Validation Error", "Please enter some text to convert to speech")

	# Limit text length to prevent timeouts
	if len(text_val) > 5000:
	return None, format_error_message("Validation Error", "Text is too long. Please keep it under 5000 characters.")

	# Require sign-in via HF OAuth token
	access_token = getattr(hf_token, "token", None) if hf_token is not None else None
	username = getattr(hf_profile, "username", None) if hf_profile is not None else None
	if not access_token:
	return None, format_error_message("Access Required", "Please sign in with Hugging Face (sidebar Login button).")

	# Generate speech
	return generate_text_to_speech(
	text=text_val.strip(),
	model_name=model_val,
	provider=provider_val,
	voice=voice_val,
	speed=speed_val,
	audio_url=audio_url_val,
	exaggeration=exaggeration_val,
	temperature=temperature_val,
	cfg=cfg_val,
	client_name=username
	)