vinuajeesh commited on
Commit
633a2af
·
verified ·
1 Parent(s): 1cc005a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -23
app.py CHANGED
@@ -1,8 +1,9 @@
1
  import gradio as gr
2
  from huggingface_hub import hf_hub_download
3
  from llama_cpp import Llama
 
4
 
5
- # Download the model from Hugging Face Hub
6
  print("===== Downloading model... =====")
7
  model_path = hf_hub_download(
8
  repo_id="bartowski/Dolphin3.0-Llama3.2-3B-GGUF",
@@ -10,43 +11,74 @@ model_path = hf_hub_download(
10
  )
11
  print(f"Model downloaded to: {model_path}")
12
 
13
- # Load the model with llama-cpp-python
14
  print("===== Loading model... =====")
 
 
 
 
15
  llm = Llama(
16
  model_path=model_path,
17
- n_ctx=2048, # adjust as per RAM
18
- n_threads=8 # adjust based on your Space CPU (8 is good default)
 
 
 
 
19
  )
20
- print("Model loaded.")
 
 
 
 
 
 
 
 
21
 
22
- # Chat function with Dolphin 3.0 template
23
- def chat(user_input):
24
- print(f"User input: {user_input}")
25
  full_prompt = f"""### System:
26
  You are Dolphin 3.0, a helpful and friendly AI assistant.
27
 
28
- ### User:
29
- {user_input}
30
 
31
  ### Assistant:"""
32
 
33
- output = llm(
34
  full_prompt,
35
- max_tokens=512,
36
- stop=["</s>", "### User:", "### Assistant:"]
 
37
  )
38
 
39
- reply = output['choices'][0]['text'].strip()
40
- print(f"Model reply: {reply}")
41
- return reply
 
 
42
 
43
- # Gradio UI
44
- iface = gr.Interface(
45
  fn=chat,
46
- inputs="text",
47
- outputs="text",
48
- title="🐬 Dolphin 3.0 - Llama 3.2 3B GGUF Chat",
49
- description="Running Dolphin 3.0 Llama 3.2 3B GGUF model using llama-cpp-python on Hugging Face Space"
 
 
 
 
 
 
50
  )
51
 
52
- iface.launch()
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from huggingface_hub import hf_hub_download
3
  from llama_cpp import Llama
4
+ import os
5
 
6
+ # --- 1. Model Downloading (No changes) ---
7
  print("===== Downloading model... =====")
8
  model_path = hf_hub_download(
9
  repo_id="bartowski/Dolphin3.0-Llama3.2-3B-GGUF",
 
11
  )
12
  print(f"Model downloaded to: {model_path}")
13
 
14
+ # --- 2. Model Loading (Optimized for HF Space CPU) ---
15
  print("===== Loading model... =====")
16
+
17
+ # This will correctly get the number of CPUs allocated to your Space (often 2 on free tier)
18
+ n_threads = os.cpu_count()
19
+
20
  llm = Llama(
21
  model_path=model_path,
22
+ n_ctx=2048,
23
+ n_threads=n_threads,
24
+ # --- !! KEY CHANGE FOR HUGGING FACE SPACES !! ---
25
+ # Set n_gpu_layers to 0. Free Spaces run on CPUs.
26
+ # Setting this to a non-zero value will cause errors without a GPU upgrade.
27
+ n_gpu_layers=0
28
  )
29
+ print(f"Model loaded for CPU execution with {n_threads} threads.")
30
+
31
+
32
+ # --- 3. Chat Function with Streaming (Still the best!) ---
33
+ def chat(message, history):
34
+ # This function remains the same, as streaming is even MORE important on a slower CPU!
35
+ history_prompt = ""
36
+ for user_msg, assistant_msg in history:
37
+ history_prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n"
38
 
 
 
 
39
  full_prompt = f"""### System:
40
  You are Dolphin 3.0, a helpful and friendly AI assistant.
41
 
42
+ {history_prompt}### User:
43
+ {message}
44
 
45
  ### Assistant:"""
46
 
47
+ stream = llm(
48
  full_prompt,
49
+ max_tokens=1024,
50
+ stop=["</s>", "### User:", "### Assistant:"],
51
+ stream=True
52
  )
53
 
54
+ partial_message = ""
55
+ for output in stream:
56
+ token = output['choices'][0]['text']
57
+ partial_message += token
58
+ yield partial_message
59
 
60
+ # --- 4. The Enhanced Chatbot UI (Perfect for Spaces!) ---
61
+ iface = gr.ChatInterface(
62
  fn=chat,
63
+ title="🐬 Dolphin 3.0 on Hugging Face Spaces",
64
+ description="A sleek, streaming chat interface running on a CPU Space.",
65
+ chatbot=gr.Chatbot(height=500),
66
+ textbox=gr.Textbox(placeholder="Ask me something... I'm all yours.", container=False, scale=7),
67
+ theme="soft",
68
+ examples=[["Hello!"], ["Write a short poem about the stars."], ["What is the capital of India?"]],
69
+ cache_examples=False,
70
+ retry_btn=None,
71
+ undo_btn="Delete Previous",
72
+ clear_btn="Clear Chat",
73
  )
74
 
75
+ # --- Pro-Tip: Create a requirements.txt file for your Space! ---
76
+ # Your Space needs to know what libraries to install. Create a file
77
+ # named `requirements.txt` in your repository with the following lines:
78
+ #
79
+ # gradio
80
+ # llama-cpp-python
81
+ # huggingface_hub
82
+
83
+ if __name__ == "__main__":
84
+ iface.launch()