Enderchef commited on
Commit
2f37be3
·
verified ·
1 Parent(s): 86109d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -40
app.py CHANGED
@@ -1,33 +1,22 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
- from spaces import GPU
4
 
5
- """
6
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
7
- """
8
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
9
 
10
-
11
- def respond(
12
- message,
13
- history: list[tuple[str, str]],
14
- system_message,
15
- max_tokens,
16
- temperature,
17
- top_p,
18
- ):
19
  messages = [{"role": "system", "content": system_message}]
20
-
21
- for val in history:
22
- if val[0]:
23
- messages.append({"role": "user", "content": val[0]})
24
- if val[1]:
25
- messages.append({"role": "assistant", "content": val[1]})
26
 
27
  messages.append({"role": "user", "content": message})
28
-
29
  response = ""
30
-
31
  for message in client.chat_completion(
32
  messages,
33
  max_tokens=max_tokens,
@@ -36,30 +25,49 @@ def respond(
36
  top_p=top_p,
37
  ):
38
  token = message.choices[0].delta.content
39
-
40
  response += token
41
  yield response
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- """
45
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
46
- """
47
- demo = gr.ChatInterface(
48
- respond,
49
- additional_inputs=[
50
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
51
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- gr.Slider(
54
- minimum=0.1,
55
- maximum=1.0,
56
- value=0.95,
57
- step=0.05,
58
- label="Top-p (nucleus sampling)",
59
- ),
60
- ],
61
- )
62
 
 
 
 
 
 
 
63
 
 
 
 
 
 
 
 
64
  if __name__ == "__main__":
65
  demo.launch()
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
3
 
4
+ # Initialize the client for ZeroGPU-powered model
 
 
5
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
6
 
7
+ # Response function
8
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
 
 
 
 
 
 
 
9
  messages = [{"role": "system", "content": system_message}]
10
+
11
+ for user_msg, bot_msg in history:
12
+ if user_msg:
13
+ messages.append({"role": "user", "content": user_msg})
14
+ if bot_msg:
15
+ messages.append({"role": "assistant", "content": bot_msg})
16
 
17
  messages.append({"role": "user", "content": message})
18
+
19
  response = ""
 
20
  for message in client.chat_completion(
21
  messages,
22
  max_tokens=max_tokens,
 
25
  top_p=top_p,
26
  ):
27
  token = message.choices[0].delta.content
 
28
  response += token
29
  yield response
30
 
31
+ # Gradio interface
32
+ with gr.Blocks(css=".gr-box { border-radius: 12px; padding: 16px; }") as demo:
33
+ gr.Markdown(
34
+ """
35
+ # 🤖 Zephyr-7B Chatbot (ZeroGPU Powered)
36
+ Talk to the `HuggingFaceH4/zephyr-7b-beta` model in real-time using ZeroGPU.
37
+ Customize generation settings below.
38
+ """,
39
+ elem_classes=["gr-box"]
40
+ )
41
+
42
+ with gr.Row():
43
+ chatbot = gr.Chatbot(label="Chat", show_copy_button=True, type="messages").style(height=400)
44
 
45
+ with gr.Row(equal_height=True):
46
+ system_msg = gr.Textbox(
47
+ label="🧠 System Prompt",
48
+ value="You are a friendly assistant.",
49
+ lines=2,
50
+ interactive=True
51
+ )
52
+ with gr.Row():
53
+ max_tokens = gr.Slider(1, 2048, value=512, step=1, label="Max Tokens")
54
+ temperature = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
55
+ top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
 
 
 
 
 
 
 
56
 
57
+ with gr.Row():
58
+ msg_input = gr.Textbox(label="💬 Your message", placeholder="Type a message and press Enter...").style(container=False)
59
+
60
+ # Bind function to chat
61
+ def user_submit(user_message, history, system_message, max_tokens, temperature, top_p):
62
+ return respond(user_message, history, system_message, max_tokens, temperature, top_p)
63
 
64
+ msg_input.submit(
65
+ fn=user_submit,
66
+ inputs=[msg_input, chatbot, system_msg, max_tokens, temperature, top_p],
67
+ outputs=chatbot
68
+ )
69
+
70
+ # Launch
71
  if __name__ == "__main__":
72
  demo.launch()
73
+