vinuajeesh commited on
Commit
d039ddb
·
verified ·
1 Parent(s): 633a2af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -19
app.py CHANGED
@@ -13,25 +13,19 @@ print(f"Model downloaded to: {model_path}")
13
 
14
  # --- 2. Model Loading (Optimized for HF Space CPU) ---
15
  print("===== Loading model... =====")
16
-
17
- # This will correctly get the number of CPUs allocated to your Space (often 2 on free tier)
18
  n_threads = os.cpu_count()
19
-
20
  llm = Llama(
21
  model_path=model_path,
22
  n_ctx=2048,
23
  n_threads=n_threads,
24
- # --- !! KEY CHANGE FOR HUGGING FACE SPACES !! ---
25
- # Set n_gpu_layers to 0. Free Spaces run on CPUs.
26
- # Setting this to a non-zero value will cause errors without a GPU upgrade.
27
  n_gpu_layers=0
28
  )
 
29
  print(f"Model loaded for CPU execution with {n_threads} threads.")
30
 
31
 
32
- # --- 3. Chat Function with Streaming (Still the best!) ---
33
  def chat(message, history):
34
- # This function remains the same, as streaming is even MORE important on a slower CPU!
35
  history_prompt = ""
36
  for user_msg, assistant_msg in history:
37
  history_prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n"
@@ -57,7 +51,9 @@ You are Dolphin 3.0, a helpful and friendly AI assistant.
57
  partial_message += token
58
  yield partial_message
59
 
60
- # --- 4. The Enhanced Chatbot UI (Perfect for Spaces!) ---
 
 
61
  iface = gr.ChatInterface(
62
  fn=chat,
63
  title="🐬 Dolphin 3.0 on Hugging Face Spaces",
@@ -67,18 +63,9 @@ iface = gr.ChatInterface(
67
  theme="soft",
68
  examples=[["Hello!"], ["Write a short poem about the stars."], ["What is the capital of India?"]],
69
  cache_examples=False,
70
- retry_btn=None,
71
- undo_btn="Delete Previous",
72
- clear_btn="Clear Chat",
73
  )
74
 
75
- # --- Pro-Tip: Create a requirements.txt file for your Space! ---
76
- # Your Space needs to know what libraries to install. Create a file
77
- # named `requirements.txt` in your repository with the following lines:
78
- #
79
- # gradio
80
- # llama-cpp-python
81
- # huggingface_hub
82
 
83
  if __name__ == "__main__":
84
  iface.launch()
 
13
 
14
  # --- 2. Model Loading (Optimized for HF Space CPU) ---
15
  print("===== Loading model... =====")
 
 
16
  n_threads = os.cpu_count()
 
17
  llm = Llama(
18
  model_path=model_path,
19
  n_ctx=2048,
20
  n_threads=n_threads,
 
 
 
21
  n_gpu_layers=0
22
  )
23
+ # I see from your logs it found 16 threads! That's a powerful CPU Space.
24
  print(f"Model loaded for CPU execution with {n_threads} threads.")
25
 
26
 
27
+ # --- 3. Chat Function with Streaming (No changes) ---
28
  def chat(message, history):
 
29
  history_prompt = ""
30
  for user_msg, assistant_msg in history:
31
  history_prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n"
 
51
  partial_message += token
52
  yield partial_message
53
 
54
+ # --- 4. The Enhanced Chatbot UI (Corrected!) ---
55
+ # We've removed the 'retry_btn' and 'undo_btn' arguments to match the
56
+ # version of Gradio running on Hugging Face Spaces.
57
  iface = gr.ChatInterface(
58
  fn=chat,
59
  title="🐬 Dolphin 3.0 on Hugging Face Spaces",
 
63
  theme="soft",
64
  examples=[["Hello!"], ["Write a short poem about the stars."], ["What is the capital of India?"]],
65
  cache_examples=False,
66
+ clear_btn="Clear Chat", # This one is universally loved and supported
 
 
67
  )
68
 
 
 
 
 
 
 
 
69
 
70
  if __name__ == "__main__":
71
  iface.launch()