Spaces:
Running
Running
import gradio as gr | |
from openai import OpenAI | |
import os | |
# Retrieve the access token from the environment variable | |
ACCESS_TOKEN = os.getenv("HF_TOKEN") | |
# Initialize the OpenAI API client | |
client = OpenAI( | |
base_url="https://api-inference.huggingface.co/v1/", | |
api_key=ACCESS_TOKEN, | |
) | |
def respond( | |
message, | |
history, | |
system_message, | |
max_tokens, | |
temperature, | |
top_p, | |
frequency_penalty, | |
seed | |
): | |
# Process the incoming message | |
print(f"Received message: {message}") | |
print(f"History: {history}") | |
print(f"System Message: {system_message}") | |
print(f"Max Tokens: {max_tokens}, Temperature: {temperature}, Top P: {top_p}") | |
print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}") | |
# Convert seed to None if -1 (random) | |
if seed == -1: | |
seed = None | |
# Construct the messages list for the API | |
messages = [{"role": "system", "content": system_message}] | |
# Add conversation history to the context | |
for user_message, assistant_message in history: | |
if user_message: | |
messages.append({"role": "user", "content": user_message}) | |
print(f"Added user message: {user_message}") | |
if assistant_message: | |
messages.append({"role": "assistant", "content": assistant_message}) | |
print(f"Added assistant message: {assistant_message}") | |
# Append the latest message | |
messages.append({"role": "user", "content": message}) | |
# Initialize response | |
response = "" | |
# Make the API request | |
for chunk in client.chat.completions.create( | |
model="meta-llama/Llama-3.3-70B-Instruct", | |
messages=messages, | |
max_tokens=max_tokens, | |
temperature=temperature, | |
top_p=top_p, | |
frequency_penalty=frequency_penalty, | |
seed=seed, | |
stream=True, | |
): | |
# Extract the token text from the response chunk | |
token = chunk.choices[0].message.content | |
response += token | |
yield response | |
# Create the Gradio Chatbot component | |
chatbot = gr.Chatbot(height=600) | |
# Define the Gradio ChatInterface | |
demo = gr.ChatInterface( | |
chatbot=chatbot, | |
fn=respond, | |
inputs=[ | |
gr.Textbox(lines=1, placeholder="Enter your message..."), | |
gr.Chatbot(label="Conversation History"), | |
gr.Textbox(label="System Message"), | |
gr.Slider(minimum=10, maximum=200, step=1, label="Max Tokens"), | |
gr.Slider(minimum=0, maximum=2, step=0.1, label="Temperature"), | |
gr.Slider(minimum=0, maximum=1, step=0.05, label="Top P"), | |
gr.Slider(minimum=-2, maximum=2, step=0.1, label="Frequency Penalty"), | |
gr.Slider(minimum=-1, maximum=1000000, step=1, label="Seed (-1 for random)"), | |
], | |
theme="Nymbo/Nymbo_Theme", | |
) | |
# Create the "Featured Models" accordion | |
with gr.Accordion("Featured Models", open=True) as featured_models: | |
# Textbox for searching models | |
model_search = gr.Textbox(label="Filter Models") | |
# List of featured models | |
models = [ | |
"meta-llama/Llama-3.3-70B-Instruct", | |
"meta-llama/Llama-2-70B-Chat-hf", | |
"TheBloke/Llama-2-13B-Chat-GGML", | |
"TheBloke/Llama-2-70B-Chat-GGML", | |
"TheBloke/Llama-2-13B-Chat-GGML-v2", | |
"TheBloke/Llama-2-70B-Chat-GGML-v2", | |
"TheBloke/Llama-2-70B-Chat-HF-API-compatible-GGML", | |
"TheBloke/Llama-2-70b-chat-hf", | |
"TheBloke/Llama-2-70B-Chat-GGML-v2-32K", | |
"TheBloke/Llama-2-13B-Chat-GGML-v2-32K", | |
"TheBloke/Llama-2-70B-Chat-GGML-v2-32K", | |
"TheBloke/Llama-2-13B-Chat-GGML-v2-32K", | |
"TheBloke/Llama-2-70B-Chat-GGML-v2-32K", | |
"TheBloke/Llama-7-13B-Chat-GGML-v2-32K", | |
"TheBloke/Llama-2-70B-Chat-GGML-v2-32K", | |
"TheBloke/Llama-2-13B-Chat-GGML-v2-32K", | |
"TheBloke/Llama-2-70B-Chat-GGML-v2-32K", | |
# Add more models as needed... | |
] | |
# Radio buttons for selecting a model | |
model_radio = gr.Radio(choices=models, label="Select a Model") | |
# Update the model list based on search input | |
def filter_models(search_term): | |
filtered_models = [model for model in models if search_term.lower() in model.lower()] | |
return gr.update(choices=filtered_models) | |
# Update the model list when the search box is used | |
model_search.change(filter_models, inputs=model_search, outputs=model_radio) | |
# Create a "Custom Model" textbox | |
custom_model = gr.Textbox(label="Custom Model", placeholder="Hugging Face model path") | |
# Create the "Information" tab | |
with gr.Tab("Information"): | |
# Featured Models accordion | |
with gr.Accordion("Featured Models", open=False): | |
gr.Markdown( | |
""" | |
# Featured Models | |
Here's a list of some popular models available on Hugging Face: | |
- meta-llama/Llama-3.3-70B-Instruct | |
- meta-llama/Llama-2-70B-Chat-hf | |
- TheBloke/Llama-2-13B-Chat-GGML | |
- TheBloke/Llama-2-70B-Chat-GGML | |
- TheBloke/Llama-2-13B-Chat-GGML-v2 | |
- TheBloke/Llama-2-70B-Chat-GGML-v2 | |
- ... (and many more) | |
You can search and select a model from the list above, or use your own custom model path. | |
""" | |
) | |
# Parameters Overview accordion | |
with gr.Accordion("Parameters Overview", open=False): | |
gr.Markdown( | |
""" | |
# Parameters Overview | |
Here's a brief explanation of the parameters you can adjust: | |
- **Max Tokens**: The maximum number of tokens to generate in the response. | |
- **Temperature**: Controls the randomness of the output. Higher values make the output more random. | |
- **Top P**: Also known as nucleus sampling, it filters the least probable tokens, encouraging the model to be more creative. | |
- **Frequency Penalty**: Penalizes repeated tokens to avoid repetition. | |
- **Seed**: A fixed seed for reproducibility. Use -1 for a random seed. | |
Feel free to experiment with these settings to achieve the desired output. | |
""" | |
) | |
# Launch the Gradio interface | |
demo.launch(share=True) |