Spaces:

Nymbo
/

Serverless-TextGen-Hub

Running

App Files Files Community

Serverless-TextGen-Hub / app.py

Nymbo

Update app.py

cf508a7 verified 7 months ago

raw

history blame

6.08 kB

	import gradio as gr
	from openai import OpenAI
	import os

	# Retrieve the access token from the environment variable
	ACCESS_TOKEN = os.getenv("HF_TOKEN")

	# Initialize the OpenAI API client
	client = OpenAI(
	base_url="https://api-inference.huggingface.co/v1/",
	api_key=ACCESS_TOKEN,
	)

	def respond(
	message,
	history,
	system_message,
	max_tokens,
	temperature,
	top_p,
	frequency_penalty,
	seed
	):
	# Process the incoming message
	print(f"Received message: {message}")
	print(f"History: {history}")
	print(f"System Message: {system_message}")
	print(f"Max Tokens: {max_tokens}, Temperature: {temperature}, Top P: {top_p}")
	print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")

	# Convert seed to None if -1 (random)
	if seed == -1:
	seed = None

	# Construct the messages list for the API
	messages = [{"role": "system", "content": system_message}]

	# Add conversation history to the context
	for user_message, assistant_message in history:
	if user_message:
	messages.append({"role": "user", "content": user_message})
	print(f"Added user message: {user_message}")
	if assistant_message:
	messages.append({"role": "assistant", "content": assistant_message})
	print(f"Added assistant message: {assistant_message}")

	# Append the latest message
	messages.append({"role": "user", "content": message})

	# Initialize response
	response = ""

	# Make the API request
	for chunk in client.chat.completions.create(
	model="meta-llama/Llama-3.3-70B-Instruct",
	messages=messages,
	max_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	frequency_penalty=frequency_penalty,
	seed=seed,
	stream=True,
	):
	# Extract the token text from the response chunk
	token = chunk.choices[0].message.content
	response += token
	yield response

	# Create the Gradio Chatbot component
	chatbot = gr.Chatbot(height=600)

	# Define the Gradio ChatInterface
	demo = gr.ChatInterface(
	chatbot=chatbot,
	fn=respond,
	inputs=[
	gr.Textbox(lines=1, placeholder="Enter your message..."),
	gr.Chatbot(label="Conversation History"),
	gr.Textbox(label="System Message"),
	gr.Slider(minimum=10, maximum=200, step=1, label="Max Tokens"),
	gr.Slider(minimum=0, maximum=2, step=0.1, label="Temperature"),
	gr.Slider(minimum=0, maximum=1, step=0.05, label="Top P"),
	gr.Slider(minimum=-2, maximum=2, step=0.1, label="Frequency Penalty"),
	gr.Slider(minimum=-1, maximum=1000000, step=1, label="Seed (-1 for random)"),
	],
	theme="Nymbo/Nymbo_Theme",
	)

	# Create the "Featured Models" accordion
	with gr.Accordion("Featured Models", open=True) as featured_models:
	# Textbox for searching models
	model_search = gr.Textbox(label="Filter Models")
	# List of featured models
	models = [
	"meta-llama/Llama-3.3-70B-Instruct",
	"meta-llama/Llama-2-70B-Chat-hf",
	"TheBloke/Llama-2-13B-Chat-GGML",
	"TheBloke/Llama-2-70B-Chat-GGML",
	"TheBloke/Llama-2-13B-Chat-GGML-v2",
	"TheBloke/Llama-2-70B-Chat-GGML-v2",
	"TheBloke/Llama-2-70B-Chat-HF-API-compatible-GGML",
	"TheBloke/Llama-2-70b-chat-hf",
	"TheBloke/Llama-2-70B-Chat-GGML-v2-32K",
	"TheBloke/Llama-2-13B-Chat-GGML-v2-32K",
	"TheBloke/Llama-2-70B-Chat-GGML-v2-32K",
	"TheBloke/Llama-2-13B-Chat-GGML-v2-32K",
	"TheBloke/Llama-2-70B-Chat-GGML-v2-32K",
	"TheBloke/Llama-7-13B-Chat-GGML-v2-32K",
	"TheBloke/Llama-2-70B-Chat-GGML-v2-32K",
	"TheBloke/Llama-2-13B-Chat-GGML-v2-32K",
	"TheBloke/Llama-2-70B-Chat-GGML-v2-32K",
	# Add more models as needed...
	]
	# Radio buttons for selecting a model
	model_radio = gr.Radio(choices=models, label="Select a Model")

	# Update the model list based on search input
	def filter_models(search_term):
	filtered_models = [model for model in models if search_term.lower() in model.lower()]
	return gr.update(choices=filtered_models)

	# Update the model list when the search box is used
	model_search.change(filter_models, inputs=model_search, outputs=model_radio)

	# Create a "Custom Model" textbox
	custom_model = gr.Textbox(label="Custom Model", placeholder="Hugging Face model path")

	# Create the "Information" tab
	with gr.Tab("Information"):
	# Featured Models accordion
	with gr.Accordion("Featured Models", open=False):
	gr.Markdown(
	"""
	# Featured Models

	Here's a list of some popular models available on Hugging Face:

	- meta-llama/Llama-3.3-70B-Instruct
	- meta-llama/Llama-2-70B-Chat-hf
	- TheBloke/Llama-2-13B-Chat-GGML
	- TheBloke/Llama-2-70B-Chat-GGML
	- TheBloke/Llama-2-13B-Chat-GGML-v2
	- TheBloke/Llama-2-70B-Chat-GGML-v2
	- ... (and many more)

	You can search and select a model from the list above, or use your own custom model path.
	"""
	)

	# Parameters Overview accordion
	with gr.Accordion("Parameters Overview", open=False):
	gr.Markdown(
	"""
	# Parameters Overview

	Here's a brief explanation of the parameters you can adjust:

	- Max Tokens: The maximum number of tokens to generate in the response.
	- Temperature: Controls the randomness of the output. Higher values make the output more random.
	- Top P: Also known as nucleus sampling, it filters the least probable tokens, encouraging the model to be more creative.
	- Frequency Penalty: Penalizes repeated tokens to avoid repetition.
	- Seed: A fixed seed for reproducibility. Use -1 for a random seed.

	Feel free to experiment with these settings to achieve the desired output.
	"""
	)

	# Launch the Gradio interface
	demo.launch(share=True)