Spaces:

vinuajeesh
/

mybot

Runtime error

App Files Files Community

vinuajeesh commited on about 1 month ago

Commit

d039ddb

verified ·

1 Parent(s): 633a2af

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -19

app.py CHANGED Viewed

@@ -13,25 +13,19 @@ print(f"Model downloaded to: {model_path}")
 # --- 2. Model Loading (Optimized for HF Space CPU) ---
 print("===== Loading model... =====")
-# This will correctly get the number of CPUs allocated to your Space (often 2 on free tier)
 n_threads = os.cpu_count()
 llm = Llama(
     model_path=model_path,
     n_ctx=2048,
     n_threads=n_threads,
-    # --- !! KEY CHANGE FOR HUGGING FACE SPACES !! ---
-    # Set n_gpu_layers to 0. Free Spaces run on CPUs.
-    # Setting this to a non-zero value will cause errors without a GPU upgrade.
     n_gpu_layers=0
 )
 print(f"Model loaded for CPU execution with {n_threads} threads.")
-# --- 3. Chat Function with Streaming (Still the best!) ---
 def chat(message, history):
-    # This function remains the same, as streaming is even MORE important on a slower CPU!
     history_prompt = ""
     for user_msg, assistant_msg in history:
         history_prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n"
@@ -57,7 +51,9 @@ You are Dolphin 3.0, a helpful and friendly AI assistant.
         partial_message += token
         yield partial_message
-# --- 4. The Enhanced Chatbot UI (Perfect for Spaces!) ---
 iface = gr.ChatInterface(
     fn=chat,
     title="🐬 Dolphin 3.0 on Hugging Face Spaces",
@@ -67,18 +63,9 @@ iface = gr.ChatInterface(
     theme="soft",
     examples=[["Hello!"], ["Write a short poem about the stars."], ["What is the capital of India?"]],
     cache_examples=False,
-    retry_btn=None,
-    undo_btn="Delete Previous",
-    clear_btn="Clear Chat",
 )
-# --- Pro-Tip: Create a requirements.txt file for your Space! ---
-# Your Space needs to know what libraries to install. Create a file
-# named `requirements.txt` in your repository with the following lines:
-#
-# gradio
-# llama-cpp-python
-# huggingface_hub
 if __name__ == "__main__":
     iface.launch()

 # --- 2. Model Loading (Optimized for HF Space CPU) ---
 print("===== Loading model... =====")
 n_threads = os.cpu_count()
 llm = Llama(
     model_path=model_path,
     n_ctx=2048,
     n_threads=n_threads,
     n_gpu_layers=0
 )
+# I see from your logs it found 16 threads! That's a powerful CPU Space.
 print(f"Model loaded for CPU execution with {n_threads} threads.")
+# --- 3. Chat Function with Streaming (No changes) ---
 def chat(message, history):
     history_prompt = ""
     for user_msg, assistant_msg in history:
         history_prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n"
         partial_message += token
         yield partial_message
+# --- 4. The Enhanced Chatbot UI (Corrected!) ---
+# We've removed the 'retry_btn' and 'undo_btn' arguments to match the
+# version of Gradio running on Hugging Face Spaces.
 iface = gr.ChatInterface(
     fn=chat,
     title="🐬 Dolphin 3.0 on Hugging Face Spaces",
     theme="soft",
     examples=[["Hello!"], ["Write a short poem about the stars."], ["What is the capital of India?"]],
     cache_examples=False,
+    clear_btn="Clear Chat", # This one is universally loved and supported
 )
 if __name__ == "__main__":
     iface.launch()