Nymbo's picture
Update app.py
cf508a7 verified
raw
history blame
6.08 kB
import gradio as gr
from openai import OpenAI
import os
# Retrieve the access token from the environment variable
ACCESS_TOKEN = os.getenv("HF_TOKEN")
# Initialize the OpenAI API client
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
def respond(
message,
history,
system_message,
max_tokens,
temperature,
top_p,
frequency_penalty,
seed
):
# Process the incoming message
print(f"Received message: {message}")
print(f"History: {history}")
print(f"System Message: {system_message}")
print(f"Max Tokens: {max_tokens}, Temperature: {temperature}, Top P: {top_p}")
print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
# Convert seed to None if -1 (random)
if seed == -1:
seed = None
# Construct the messages list for the API
messages = [{"role": "system", "content": system_message}]
# Add conversation history to the context
for user_message, assistant_message in history:
if user_message:
messages.append({"role": "user", "content": user_message})
print(f"Added user message: {user_message}")
if assistant_message:
messages.append({"role": "assistant", "content": assistant_message})
print(f"Added assistant message: {assistant_message}")
# Append the latest message
messages.append({"role": "user", "content": message})
# Initialize response
response = ""
# Make the API request
for chunk in client.chat.completions.create(
model="meta-llama/Llama-3.3-70B-Instruct",
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
frequency_penalty=frequency_penalty,
seed=seed,
stream=True,
):
# Extract the token text from the response chunk
token = chunk.choices[0].message.content
response += token
yield response
# Create the Gradio Chatbot component
chatbot = gr.Chatbot(height=600)
# Define the Gradio ChatInterface
demo = gr.ChatInterface(
chatbot=chatbot,
fn=respond,
inputs=[
gr.Textbox(lines=1, placeholder="Enter your message..."),
gr.Chatbot(label="Conversation History"),
gr.Textbox(label="System Message"),
gr.Slider(minimum=10, maximum=200, step=1, label="Max Tokens"),
gr.Slider(minimum=0, maximum=2, step=0.1, label="Temperature"),
gr.Slider(minimum=0, maximum=1, step=0.05, label="Top P"),
gr.Slider(minimum=-2, maximum=2, step=0.1, label="Frequency Penalty"),
gr.Slider(minimum=-1, maximum=1000000, step=1, label="Seed (-1 for random)"),
],
theme="Nymbo/Nymbo_Theme",
)
# Create the "Featured Models" accordion
with gr.Accordion("Featured Models", open=True) as featured_models:
# Textbox for searching models
model_search = gr.Textbox(label="Filter Models")
# List of featured models
models = [
"meta-llama/Llama-3.3-70B-Instruct",
"meta-llama/Llama-2-70B-Chat-hf",
"TheBloke/Llama-2-13B-Chat-GGML",
"TheBloke/Llama-2-70B-Chat-GGML",
"TheBloke/Llama-2-13B-Chat-GGML-v2",
"TheBloke/Llama-2-70B-Chat-GGML-v2",
"TheBloke/Llama-2-70B-Chat-HF-API-compatible-GGML",
"TheBloke/Llama-2-70b-chat-hf",
"TheBloke/Llama-2-70B-Chat-GGML-v2-32K",
"TheBloke/Llama-2-13B-Chat-GGML-v2-32K",
"TheBloke/Llama-2-70B-Chat-GGML-v2-32K",
"TheBloke/Llama-2-13B-Chat-GGML-v2-32K",
"TheBloke/Llama-2-70B-Chat-GGML-v2-32K",
"TheBloke/Llama-7-13B-Chat-GGML-v2-32K",
"TheBloke/Llama-2-70B-Chat-GGML-v2-32K",
"TheBloke/Llama-2-13B-Chat-GGML-v2-32K",
"TheBloke/Llama-2-70B-Chat-GGML-v2-32K",
# Add more models as needed...
]
# Radio buttons for selecting a model
model_radio = gr.Radio(choices=models, label="Select a Model")
# Update the model list based on search input
def filter_models(search_term):
filtered_models = [model for model in models if search_term.lower() in model.lower()]
return gr.update(choices=filtered_models)
# Update the model list when the search box is used
model_search.change(filter_models, inputs=model_search, outputs=model_radio)
# Create a "Custom Model" textbox
custom_model = gr.Textbox(label="Custom Model", placeholder="Hugging Face model path")
# Create the "Information" tab
with gr.Tab("Information"):
# Featured Models accordion
with gr.Accordion("Featured Models", open=False):
gr.Markdown(
"""
# Featured Models
Here's a list of some popular models available on Hugging Face:
- meta-llama/Llama-3.3-70B-Instruct
- meta-llama/Llama-2-70B-Chat-hf
- TheBloke/Llama-2-13B-Chat-GGML
- TheBloke/Llama-2-70B-Chat-GGML
- TheBloke/Llama-2-13B-Chat-GGML-v2
- TheBloke/Llama-2-70B-Chat-GGML-v2
- ... (and many more)
You can search and select a model from the list above, or use your own custom model path.
"""
)
# Parameters Overview accordion
with gr.Accordion("Parameters Overview", open=False):
gr.Markdown(
"""
# Parameters Overview
Here's a brief explanation of the parameters you can adjust:
- **Max Tokens**: The maximum number of tokens to generate in the response.
- **Temperature**: Controls the randomness of the output. Higher values make the output more random.
- **Top P**: Also known as nucleus sampling, it filters the least probable tokens, encouraging the model to be more creative.
- **Frequency Penalty**: Penalizes repeated tokens to avoid repetition.
- **Seed**: A fixed seed for reproducibility. Use -1 for a random seed.
Feel free to experiment with these settings to achieve the desired output.
"""
)
# Launch the Gradio interface
demo.launch(share=True)