frimelle HF Staff commited on
Commit
865324e
·
1 Parent(s): abbd661

change model, log messages

Browse files
Files changed (1) hide show
  1. app.py +64 -30
app.py CHANGED
@@ -1,33 +1,67 @@
1
  import gradio as gr
 
 
 
 
 
2
 
3
- # You can replace this with a file read, environment variable, or UI input
 
4
  with open("system_prompt.txt", "r") as f:
5
- system_prompt = f.read()
6
-
7
- def respond(message, chat_history):
8
- if chat_history is None:
9
- chat_history = []
10
-
11
- # Combine system prompt and previous messages
12
- full_prompt = system_prompt + "\n"
13
- for user_msg, bot_msg in chat_history:
14
- full_prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n"
15
- full_prompt += f"User: {message}\nAssistant:"
16
-
17
- # Use a simple model (or call an API)
18
- # Example using Hugging Face transformers (e.g., tiny model for demo):
19
- from transformers import pipeline
20
- generator = pipeline("text-generation", model="mistralai/Mistral-7B-Instruct-v0.1")
21
- response = generator(full_prompt, max_new_tokens=100)[0]['generated_text'].split("Assistant:")[-1].strip()
22
-
23
- chat_history.append((message, response))
24
- return "", chat_history
25
-
26
- chatbot = gr.Chatbot()
27
- interface = gr.Interface(fn=respond,
28
- inputs=["text", "state"],
29
- outputs=["text", "state"],
30
- live=False,
31
- title="Custom Prompt Chatbot")
32
-
33
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import uuid
5
+ import os
6
+ from datetime import datetime
7
 
8
+ # ----- Constants -----
9
+ MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
10
  with open("system_prompt.txt", "r") as f:
11
+ SYSTEM_PROMPT = f.read()
12
+ LOG_DIR = "chat_logs"
13
+ os.makedirs(LOG_DIR, exist_ok=True)
14
+
15
+ # ----- Load model and tokenizer -----
16
+ device = "cuda" if torch.cuda.is_available() else "cpu"
17
+
18
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
19
+ model = AutoModelForCausalLM.from_pretrained(
20
+ MODEL_NAME,
21
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
22
+ device_map="auto" if device == "cuda" else None
23
+ )
24
+ model.eval()
25
+
26
+ # ----- Log setup -----
27
+ session_id = str(uuid.uuid4())
28
+
29
+ def log_chat(session_id, user_msg, bot_msg):
30
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
31
+ with open(os.path.join(LOG_DIR, f"{session_id}.txt"), "a") as f:
32
+ f.write(f"[{timestamp}] User: {user_msg}\n")
33
+ f.write(f"[{timestamp}] Bot: {bot_msg}\n\n")
34
+
35
+ # ----- Inference -----
36
+ def format_chat_prompt(history, new_input):
37
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
38
+ for user_msg, bot_msg in history:
39
+ messages.append({"role": "user", "content": user_msg})
40
+ messages.append({"role": "assistant", "content": bot_msg})
41
+ messages.append({"role": "user", "content": new_input})
42
+ return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
43
+
44
+ @torch.no_grad()
45
+ def respond(message, history):
46
+ prompt = format_chat_prompt(history, message)
47
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
48
+ output = model.generate(
49
+ **inputs,
50
+ max_new_tokens=512,
51
+ do_sample=True,
52
+ temperature=0.7,
53
+ top_p=0.95,
54
+ pad_token_id=tokenizer.eos_token_id
55
+ )
56
+ decoded = tokenizer.decode(output[0], skip_special_tokens=True)
57
+ # Extract the assistant's final message
58
+ response = decoded.split(message)[-1].strip().split("\n")[0].strip()
59
+ log_chat(session_id, message, response)
60
+ return response
61
+
62
+ # ----- Gradio Chat Interface -----
63
+ gr.ChatInterface(
64
+ fn=respond,
65
+ title="BoundrAI",
66
+ theme="soft", # optional aesthetic
67
+ ).launch()