File size: 3,231 Bytes
9551276 3be135a 5e72808 2bcefc7 73cfb02 2bcefc7 bc0fa33 14f07e7 a84f13c 14f07e7 2bcefc7 3be135a 14f07e7 5e72808 3be135a 73cfb02 5e72808 2bcefc7 110c323 bb25d5e 5e72808 e42b84a 5d31a12 5e72808 110c323 415dd0a 2bcefc7 415dd0a 3be135a 110c323 5e72808 1749d92 d31a8e3 1749d92 d31a8e3 1749d92 d31a8e3 1749d92 235d69c 1749d92 751e565 6412ee6 5e72808 d15d224 110c323 d15d224 b44b7cc 0f352e1 6412ee6 73cfb02 6412ee6 73cfb02 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import os
import random
import gradio as gr
from groq import Groq
client = Groq(
api_key=os.environ.get("Groq_Api_Key")
)
def create_history_messages(history):
history_messages = [{"role": "user", "content": m[0]} for m in history]
history_messages.extend([{"role": "assistant", "content": m[1]} for m in history])
return history_messages
def generate_response(prompt, history, model, temperature, max_tokens, top_p, seed):
messages = create_history_messages(history)
messages.append({"role": "user", "content": prompt})
print(messages)
if seed == 0:
seed = random.randint(1, 100000)
stream = client.chat.completions.create(
messages=messages,
model=model,
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
seed=seed,
stop=None,
stream=True,
)
response = ""
for chunk in stream:
delta_content = chunk.choices[0].delta.content
if delta_content is not None:
response += delta_content
yield response
return response
additional_inputs = [
gr.Dropdown(choices=["llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma-7b-it"], value="llama3-70b-8192", label="Model"),
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Temperature", info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative."),
gr.Slider(minimum=1, maximum=32192, step=1, value=4096, label="Max Tokens", info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b, llama 7b & 70b, 32k for mixtral 8x7b."),
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Top P", info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p."),
gr.Number(precision=0, value=42, label="Seed", info="A starting point to initiate generation, use 0 for random")
]
theme = gr.themes.Base(
neutral_hue=gr.themes.Color(c100="#f3f4f6", c200="#e5e7eb", c300="#d1d5db", c400="#9ca3af", c50="#f9fafb", c500="#000000", c600="#3b82f6", c700="#3b82f6", c800="#26252a", c900="#26252a", c950="#000000"),
spacing_size="sm",
radius_size="lg",
).set(
block_background_fill_dark='*background_fill_primary',
block_border_color='*background_fill_primary',
block_border_color_dark='*background_fill_primary',
block_border_width='0px',
block_border_width_dark='0px',
block_label_border_color='*background_fill_primary',
block_label_border_color_dark='*background_fill_primary',
block_label_border_width='0px',
block_label_border_width_dark='0px'
)
with open('top_bar.html', 'r') as file:
top_bar_html = file.read()
interface = gr.ChatInterface(
fn=generate_response,
chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=False, likeable=False, layout="bubble"),
additional_inputs=additional_inputs,
theme=theme,
submit_btn="↑",
undo_btn="Delete",
retry_btn="Retry",
head="./top_bar.html"
)
with gr.Blocks(theme=theme) as demo:
gr.HTML(top_bar_html)
interface.render()
demo.launch() |