builder / hf_client.py
mgbam's picture
Update hf_client.py
5d6f898 verified
"""
This module handles the creation of API clients for the application.
It includes the logic for instantiating the Hugging Face InferenceClient
and the Tavily Search client.
The get_inference_client function is critical for enabling the "user-pays"
model in a Hugging Face Space. It prioritizes the API token of the logged-in
user, ensuring their account is billed for inference costs.
"""
import os
from typing import Optional
from huggingface_hub import InferenceClient
from tavily import TavilyClient
# --- Hugging Face Inference Client ---
# This is the Space owner's token, loaded from environment secrets.
# It serves as a fallback for local development or when a user-provided token is not available.
HF_TOKEN = os.getenv('HF_TOKEN')
def get_inference_client(model_id: str, provider: str = "auto", user_token: Optional[str] = None) -> InferenceClient:
"""
Creates and returns a Hugging Face InferenceClient.
This function implements the "user-pays" logic. It prioritizes using the token
provided by the logged-in user (`user_token`). If that is not available,
it falls back to the Space owner's token (`HF_TOKEN`).
Args:
model_id (str): The ID of the model to be used (e.g., "mistralai/Mistral-7B-Instruct-v0.2").
provider (str): The specific inference provider (e.g., "groq"). Defaults to "auto".
user_token (Optional[str]): The API token of the logged-in user, passed from the Gradio app.
Returns:
InferenceClient: An initialized client ready for making API calls.
Raises:
ValueError: If no API token can be found (neither from the user nor the environment).
"""
# 1. Determine which token to use for the API call. The user's token takes precedence.
token_to_use = user_token or HF_TOKEN
# 2. Validate that we have a token. If not, the application cannot make API calls.
if not token_to_use:
raise ValueError(
"Cannot proceed without an API token. "
"Please log into Hugging Face, or ensure the HF_TOKEN environment secret is set for this Space."
)
# 3. Handle any model-specific provider logic.
if model_id == "moonshotai/Kimi-K2-Instruct":
provider = "groq"
# 4. Instantiate and return the client.
# The Hugging Face Hub automatically bills the account associated with the provided `api_key`.
# The `bill_to` parameter is NOT needed or used for this user-pays scenario.
return InferenceClient(
provider=provider,
api_key=token_to_use
)
# --- Tavily Search Client ---
# This client uses the Space owner's TAVILY_API_KEY, as this is a backend service.
TAVILY_API_KEY = os.getenv('TAVILY_API_KEY')
tavily_client = None
if TAVILY_API_KEY:
try:
tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
except Exception as e:
# Log an error if the client fails to initialize, but don't crash the app.
print(f"Warning: Failed to initialize Tavily client. Web search will be unavailable. Error: {e}")
tavily_client = None