Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -15,26 +15,18 @@ print(f"Using device: {device}")
|
|
15 |
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
|
16 |
model = AutoModelForCausalLM.from_pretrained(
|
17 |
model_id,
|
18 |
-
torch_dtype=torch.float16,
|
19 |
-
device_map="auto",
|
20 |
trust_remote_code=True
|
21 |
)
|
22 |
print("Model and processor loaded successfully.")
|
23 |
|
24 |
-
# --- Define and apply
|
25 |
-
chat_template = """{% for message in messages
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
{{ message['role'].capitalize() + ': ' + message['content'] }}
|
31 |
-
{%- if not loop.last -%}
|
32 |
-
{{ ' ' }}
|
33 |
-
{%- endif %}
|
34 |
-
{%- endfor -%}
|
35 |
-
{%- if add_generation_prompt -%}
|
36 |
-
{{ ' Assistant:' }}
|
37 |
-
{%- endif %}"""
|
38 |
processor.tokenizer.chat_template = chat_template
|
39 |
|
40 |
# --- 2. Gradio Chatbot Logic ---
|
@@ -44,12 +36,15 @@ def generate_response(user_message, messages_list, image_pil, max_new_tokens, to
|
|
44 |
Generate a response from the model using streaming.
|
45 |
"""
|
46 |
try:
|
47 |
-
#
|
48 |
-
messages_list.
|
|
|
|
|
|
|
49 |
|
50 |
# Use the processor to apply the chat template
|
51 |
prompt = processor.tokenizer.apply_chat_template(
|
52 |
-
|
53 |
tokenize=False,
|
54 |
add_generation_prompt=True
|
55 |
)
|
@@ -85,8 +80,10 @@ def generate_response(user_message, messages_list, image_pil, max_new_tokens, to
|
|
85 |
thread.start()
|
86 |
|
87 |
# Yield the generated tokens as they become available
|
|
|
88 |
for new_token in streamer:
|
89 |
-
|
|
|
90 |
|
91 |
except Exception as e:
|
92 |
print(f"Error during inference: {e}")
|
@@ -103,12 +100,10 @@ def process_chat(user_message, chatbot_display, messages_list, image_pil, max_ne
|
|
103 |
# Append user's message to the chatbot display list
|
104 |
chatbot_display.append((user_message, ""))
|
105 |
|
106 |
-
# Initialize the response as an empty string
|
107 |
-
response = ""
|
108 |
-
|
109 |
# Generate the response using streaming
|
|
|
110 |
for chunk in generate_response(user_message, messages_list, image_pil, max_new_tokens, top_p, top_k, temperature):
|
111 |
-
response
|
112 |
# Update the chatbot display with the current response
|
113 |
chatbot_display[-1] = (user_message, response)
|
114 |
yield chatbot_display, messages_list, ""
|
@@ -184,4 +179,4 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutra
|
|
184 |
)
|
185 |
|
186 |
if __name__ == "__main__":
|
187 |
-
demo.launch()
|
|
|
15 |
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
|
16 |
model = AutoModelForCausalLM.from_pretrained(
|
17 |
model_id,
|
18 |
+
torch_dtype=torch.float16,
|
19 |
+
device_map="auto",
|
20 |
trust_remote_code=True
|
21 |
)
|
22 |
print("Model and processor loaded successfully.")
|
23 |
|
24 |
+
# --- Define and apply a more flexible chat template ---
|
25 |
+
chat_template = """{% for message in messages %}
|
26 |
+
{{ message['role'].capitalize() }}: {{ message['content'] }}
|
27 |
+
{% if not loop.last %}{{ '\n' }}{% endif %}
|
28 |
+
{% endfor %}
|
29 |
+
{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
processor.tokenizer.chat_template = chat_template
|
31 |
|
32 |
# --- 2. Gradio Chatbot Logic ---
|
|
|
36 |
Generate a response from the model using streaming.
|
37 |
"""
|
38 |
try:
|
39 |
+
# Create a copy of the messages list to avoid modifying the original
|
40 |
+
current_messages = messages_list.copy()
|
41 |
+
current_messages.append({"role": "user", "content": user_message})
|
42 |
+
|
43 |
+
print(current_messages)
|
44 |
|
45 |
# Use the processor to apply the chat template
|
46 |
prompt = processor.tokenizer.apply_chat_template(
|
47 |
+
current_messages,
|
48 |
tokenize=False,
|
49 |
add_generation_prompt=True
|
50 |
)
|
|
|
80 |
thread.start()
|
81 |
|
82 |
# Yield the generated tokens as they become available
|
83 |
+
response = ""
|
84 |
for new_token in streamer:
|
85 |
+
response += new_token
|
86 |
+
yield response
|
87 |
|
88 |
except Exception as e:
|
89 |
print(f"Error during inference: {e}")
|
|
|
100 |
# Append user's message to the chatbot display list
|
101 |
chatbot_display.append((user_message, ""))
|
102 |
|
|
|
|
|
|
|
103 |
# Generate the response using streaming
|
104 |
+
response = ""
|
105 |
for chunk in generate_response(user_message, messages_list, image_pil, max_new_tokens, top_p, top_k, temperature):
|
106 |
+
response = chunk
|
107 |
# Update the chatbot display with the current response
|
108 |
chatbot_display[-1] = (user_message, response)
|
109 |
yield chatbot_display, messages_list, ""
|
|
|
179 |
)
|
180 |
|
181 |
if __name__ == "__main__":
|
182 |
+
demo.launch(mcp_server=True)
|