vinuajeesh commited on
Commit
1cc005a
·
verified ·
1 Parent(s): 469cad2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -94
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  from huggingface_hub import hf_hub_download
3
  from llama_cpp import Llama
4
 
5
- # --- 1. MODEL LOADING ---
6
  print("===== Downloading model... =====")
7
  model_path = hf_hub_download(
8
  repo_id="bartowski/Dolphin3.0-Llama3.2-3B-GGUF",
@@ -10,107 +10,43 @@ model_path = hf_hub_download(
10
  )
11
  print(f"Model downloaded to: {model_path}")
12
 
 
13
  print("===== Loading model... =====")
14
  llm = Llama(
15
  model_path=model_path,
16
- n_ctx=1096,
17
- n_threads=8,
18
- n_gpu_layers=0
19
  )
20
  print("Model loaded.")
21
 
 
 
 
 
 
22
 
23
- # --- 2. UPGRADED CHAT & STREAMING LOGIC ---
24
- default_system_prompt = "You are Dolphin 3.0, a helpful and friendly AI assistant."
25
 
26
- def chat_stream(user_message, chat_history, system_prompt, temperature, top_p):
27
- messages = [{"role": "system", "content": system_prompt}]
28
- for human, ai in chat_history:
29
- messages.append({"role": "user", "content": human})
30
- messages.append({"role": "assistant", "content": ai})
31
- messages.append({"role": "user", "content": user_message})
32
-
33
- chat_history.append([user_message, ""])
34
-
35
- stream = llm.create_chat_completion(
36
- messages=messages,
37
- temperature=temperature,
38
- top_p=top_p,
39
- max_tokens=1024,
40
- stream=True,
41
- )
42
-
43
- for chunk in stream:
44
- delta = chunk['choices'][0]['delta']
45
- if 'content' in delta:
46
- chat_history[-1][1] += delta['content']
47
- yield chat_history
48
-
49
-
50
- # --- 3. ADVANCED GRADIO UI ---
51
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky"), css="#chatbot { min-height: 600px; }") as demo:
52
- gr.Markdown("## 🐬 Dolphin 3.0 - Upgraded Chat Interface")
53
-
54
- with gr.Row():
55
- with gr.Column(scale=4):
56
- chatbot = gr.Chatbot(
57
- [],
58
- elem_id="chatbot",
59
- bubble_full_width=False,
60
- avatar_images=(("human.png", "dolphin.png")),
61
- label="Chat with Dolphin 3.0"
62
- )
63
- chat_history = gr.State([])
64
-
65
- with gr.Row():
66
- message = gr.Textbox(
67
- label="Type your message here...",
68
- placeholder="What's on your mind?",
69
- lines=1,
70
- scale=7,
71
- )
72
- send_button = gr.Button("Send", variant="primary", scale=1)
73
-
74
- with gr.Column(scale=1):
75
- with gr.Accordion("Advanced Settings", open=False):
76
- system_prompt = gr.Textbox(value=default_system_prompt, label="System Prompt", lines=3)
77
- temperature = gr.Slider(minimum=0.1, maximum=1.5, value=0.7, label="Temperature")
78
- top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p")
79
-
80
- clear_button = gr.Button("🗑️ Clear Chat")
81
- regenerate_button = gr.Button("🔄 Regenerate")
82
-
83
-
84
- # --- 4. EVENT HANDLERS ---
85
- def user_submit(user_message, history, system, temp, top_p):
86
- yield gr.update(value=""), history + [[user_message, None]]
87
- for updated_history in chat_stream(user_message, history, system, temp, top_p):
88
- yield gr.update(value=""), updated_history
89
-
90
- message.submit(user_submit, [message, chat_history, system_prompt, temperature, top_p], [message, chatbot])
91
- send_button.click(user_submit, [message, chat_history, system_prompt, temperature, top_p], [message, chatbot])
92
-
93
- def clear_chat():
94
- return [], []
95
-
96
- clear_button.click(clear_chat, [], [chatbot, chat_history], queue=False)
97
 
98
- def regenerate_response(history, system, temp, top_p):
99
- if not history:
100
- return
101
- last_user_message = history[-1][0]
102
- reduced_history = history[:-1]
103
- for updated_history in chat_stream(last_user_message, reduced_history, system, temp, top_p):
104
- yield updated_history
105
-
106
- regenerate_button.click(
107
- regenerate_response,
108
- [chat_history, system_prompt, temperature, top_p],
109
- [chatbot]
110
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
- # --- 5. LAUNCH THE APP (WITH THE BUG FIX) ---
113
- if __name__ == "__main__":
114
- demo.queue()
115
- # The show_api=False parameter tells Gradio to not build the API page, avoiding the bug.
116
- demo.launch(debug=True, show_api=False)
 
2
  from huggingface_hub import hf_hub_download
3
  from llama_cpp import Llama
4
 
5
+ # Download the model from Hugging Face Hub
6
  print("===== Downloading model... =====")
7
  model_path = hf_hub_download(
8
  repo_id="bartowski/Dolphin3.0-Llama3.2-3B-GGUF",
 
10
  )
11
  print(f"Model downloaded to: {model_path}")
12
 
13
+ # Load the model with llama-cpp-python
14
  print("===== Loading model... =====")
15
  llm = Llama(
16
  model_path=model_path,
17
+ n_ctx=2048, # adjust as per RAM
18
+ n_threads=8 # adjust based on your Space CPU (8 is good default)
 
19
  )
20
  print("Model loaded.")
21
 
22
+ # Chat function with Dolphin 3.0 template
23
+ def chat(user_input):
24
+ print(f"User input: {user_input}")
25
+ full_prompt = f"""### System:
26
+ You are Dolphin 3.0, a helpful and friendly AI assistant.
27
 
28
+ ### User:
29
+ {user_input}
30
 
31
+ ### Assistant:"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ output = llm(
34
+ full_prompt,
35
+ max_tokens=512,
36
+ stop=["</s>", "### User:", "### Assistant:"]
 
 
 
 
 
 
 
 
37
  )
38
+
39
+ reply = output['choices'][0]['text'].strip()
40
+ print(f"Model reply: {reply}")
41
+ return reply
42
+
43
+ # Gradio UI
44
+ iface = gr.Interface(
45
+ fn=chat,
46
+ inputs="text",
47
+ outputs="text",
48
+ title="🐬 Dolphin 3.0 - Llama 3.2 3B GGUF Chat",
49
+ description="Running Dolphin 3.0 Llama 3.2 3B GGUF model using llama-cpp-python on Hugging Face Space"
50
+ )
51
 
52
+ iface.launch()