KingNish commited on
Commit
98aa9c9
·
verified ·
1 Parent(s): 689b8e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -25
app.py CHANGED
@@ -15,26 +15,18 @@ print(f"Using device: {device}")
15
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
16
  model = AutoModelForCausalLM.from_pretrained(
17
  model_id,
18
- torch_dtype=torch.float16, # Use float16 for less memory usage on GPU
19
- device_map="auto", # Automatically uses available GPUs
20
  trust_remote_code=True
21
  )
22
  print("Model and processor loaded successfully.")
23
 
24
- # --- Define and apply the chat template ---
25
- chat_template = """{% for message in messages -%}
26
- {%- if (loop.index % 2 == 1 and message['role'] != 'user') or
27
- (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}
28
- {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
29
- {%- endif -%}
30
- {{ message['role'].capitalize() + ': ' + message['content'] }}
31
- {%- if not loop.last -%}
32
- {{ ' ' }}
33
- {%- endif %}
34
- {%- endfor -%}
35
- {%- if add_generation_prompt -%}
36
- {{ ' Assistant:' }}
37
- {%- endif %}"""
38
  processor.tokenizer.chat_template = chat_template
39
 
40
  # --- 2. Gradio Chatbot Logic ---
@@ -44,12 +36,15 @@ def generate_response(user_message, messages_list, image_pil, max_new_tokens, to
44
  Generate a response from the model using streaming.
45
  """
46
  try:
47
- # Append user's message to the conversation history for the model
48
- messages_list.append({"role": "user", "content": user_message})
 
 
 
49
 
50
  # Use the processor to apply the chat template
51
  prompt = processor.tokenizer.apply_chat_template(
52
- messages_list,
53
  tokenize=False,
54
  add_generation_prompt=True
55
  )
@@ -85,8 +80,10 @@ def generate_response(user_message, messages_list, image_pil, max_new_tokens, to
85
  thread.start()
86
 
87
  # Yield the generated tokens as they become available
 
88
  for new_token in streamer:
89
- yield new_token
 
90
 
91
  except Exception as e:
92
  print(f"Error during inference: {e}")
@@ -103,12 +100,10 @@ def process_chat(user_message, chatbot_display, messages_list, image_pil, max_ne
103
  # Append user's message to the chatbot display list
104
  chatbot_display.append((user_message, ""))
105
 
106
- # Initialize the response as an empty string
107
- response = ""
108
-
109
  # Generate the response using streaming
 
110
  for chunk in generate_response(user_message, messages_list, image_pil, max_new_tokens, top_p, top_k, temperature):
111
- response += chunk
112
  # Update the chatbot display with the current response
113
  chatbot_display[-1] = (user_message, response)
114
  yield chatbot_display, messages_list, ""
@@ -184,4 +179,4 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutra
184
  )
185
 
186
  if __name__ == "__main__":
187
- demo.launch()
 
15
  processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
16
  model = AutoModelForCausalLM.from_pretrained(
17
  model_id,
18
+ torch_dtype=torch.float16,
19
+ device_map="auto",
20
  trust_remote_code=True
21
  )
22
  print("Model and processor loaded successfully.")
23
 
24
+ # --- Define and apply a more flexible chat template ---
25
+ chat_template = """{% for message in messages %}
26
+ {{ message['role'].capitalize() }}: {{ message['content'] }}
27
+ {% if not loop.last %}{{ '\n' }}{% endif %}
28
+ {% endfor %}
29
+ {% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}"""
 
 
 
 
 
 
 
 
30
  processor.tokenizer.chat_template = chat_template
31
 
32
  # --- 2. Gradio Chatbot Logic ---
 
36
  Generate a response from the model using streaming.
37
  """
38
  try:
39
+ # Create a copy of the messages list to avoid modifying the original
40
+ current_messages = messages_list.copy()
41
+ current_messages.append({"role": "user", "content": user_message})
42
+
43
+ print(current_messages)
44
 
45
  # Use the processor to apply the chat template
46
  prompt = processor.tokenizer.apply_chat_template(
47
+ current_messages,
48
  tokenize=False,
49
  add_generation_prompt=True
50
  )
 
80
  thread.start()
81
 
82
  # Yield the generated tokens as they become available
83
+ response = ""
84
  for new_token in streamer:
85
+ response += new_token
86
+ yield response
87
 
88
  except Exception as e:
89
  print(f"Error during inference: {e}")
 
100
  # Append user's message to the chatbot display list
101
  chatbot_display.append((user_message, ""))
102
 
 
 
 
103
  # Generate the response using streaming
104
+ response = ""
105
  for chunk in generate_response(user_message, messages_list, image_pil, max_new_tokens, top_p, top_k, temperature):
106
+ response = chunk
107
  # Update the chatbot display with the current response
108
  chatbot_display[-1] = (user_message, response)
109
  yield chatbot_display, messages_list, ""
 
179
  )
180
 
181
  if __name__ == "__main__":
182
+ demo.launch(mcp_server=True)