Spaces:

frimelle
/

BoundrAI

Sleeping

App Files Files Community

frimelle HF Staff commited on Apr 30

Commit

91b2732

1 Parent(s): 865324e

add zerogpu setup

Browse files

Files changed (2) hide show

app.py +20 -16
requirements.txt +3 -2

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 import uuid
 import os
 from datetime import datetime
 # ----- Constants -----
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
@@ -12,27 +13,31 @@ with open("system_prompt.txt", "r") as f:
 LOG_DIR = "chat_logs"
 os.makedirs(LOG_DIR, exist_ok=True)
-# ----- Load model and tokenizer -----
-device = "cuda" if torch.cuda.is_available() else "cpu"
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_NAME,
-    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
-    device_map="auto" if device == "cuda" else None
-)
-model.eval()
-# ----- Log setup -----
 session_id = str(uuid.uuid4())
 def log_chat(session_id, user_msg, bot_msg):
     timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     with open(os.path.join(LOG_DIR, f"{session_id}.txt"), "a") as f:
         f.write(f"[{timestamp}] User: {user_msg}\n")
         f.write(f"[{timestamp}] Bot: {bot_msg}\n\n")
-# ----- Inference -----
 def format_chat_prompt(history, new_input):
     messages = [{"role": "system", "content": SYSTEM_PROMPT}]
     for user_msg, bot_msg in history:
@@ -54,14 +59,13 @@ def respond(message, history):
         pad_token_id=tokenizer.eos_token_id
     )
     decoded = tokenizer.decode(output[0], skip_special_tokens=True)
-    # Extract the assistant's final message
     response = decoded.split(message)[-1].strip().split("\n")[0].strip()
     log_chat(session_id, message, response)
     return response
-# ----- Gradio Chat Interface -----
 gr.ChatInterface(
     fn=respond,
     title="BoundrAI",
-    theme="soft",  # optional aesthetic
 ).launch()

 import uuid
 import os
 from datetime import datetime
+import spaces  # required for ZeroGPU
 # ----- Constants -----
 MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
 LOG_DIR = "chat_logs"
 os.makedirs(LOG_DIR, exist_ok=True)
+# Global vars to hold model and tokenizer
+model = None
+tokenizer = None
 session_id = str(uuid.uuid4())
+# ----- Log Chat -----
 def log_chat(session_id, user_msg, bot_msg):
     timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     with open(os.path.join(LOG_DIR, f"{session_id}.txt"), "a") as f:
         f.write(f"[{timestamp}] User: {user_msg}\n")
         f.write(f"[{timestamp}] Bot: {bot_msg}\n\n")
+# ----- Required by ZeroGPU -----
+@spaces.GPU
+def load_model():
+    global model, tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_NAME,
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+        device_map="auto"
+    )
+    model.eval()
+# ----- Inference Function -----
 def format_chat_prompt(history, new_input):
     messages = [{"role": "system", "content": SYSTEM_PROMPT}]
     for user_msg, bot_msg in history:
         pad_token_id=tokenizer.eos_token_id
     )
     decoded = tokenizer.decode(output[0], skip_special_tokens=True)
     response = decoded.split(message)[-1].strip().split("\n")[0].strip()
     log_chat(session_id, message, response)
     return response
+# ----- Gradio App -----
 gr.ChatInterface(
     fn=respond,
     title="BoundrAI",
+    theme="soft"
 ).launch()

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 huggingface_hub==0.25.2
-transformers
 gradio
-torch

 huggingface_hub==0.25.2
 gradio
+transformers
+torch
+spaces