Spaces:

KingNish
/

Patram-7b-Demo

Paused

App Files Files Community

KingNish commited on Jun 7

Commit

98aa9c9

verified ·

1 Parent(s): 689b8e4

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -25

app.py CHANGED Viewed

@@ -15,26 +15,18 @@ print(f"Using device: {device}")
 processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
-    torch_dtype=torch.float16, # Use float16 for less memory usage on GPU
-    device_map="auto",        # Automatically uses available GPUs
     trust_remote_code=True
 )
 print("Model and processor loaded successfully.")
-# --- Define and apply the chat template ---
-chat_template = """{% for message in messages -%}
-        {%- if (loop.index % 2 == 1 and message['role'] != 'user') or
-          (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}
-        {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
-        {%- endif -%}
-        {{ message['role'].capitalize() + ': ' + message['content'] }}
-        {%- if not loop.last -%}
-        {{ ' ' }}
-        {%- endif %}
-        {%- endfor -%}
-        {%- if add_generation_prompt -%}
-        {{ ' Assistant:' }}
-        {%- endif %}"""
 processor.tokenizer.chat_template = chat_template
 # --- 2. Gradio Chatbot Logic ---
@@ -44,12 +36,15 @@ def generate_response(user_message, messages_list, image_pil, max_new_tokens, to
     Generate a response from the model using streaming.
     """
     try:
-        # Append user's message to the conversation history for the model
-        messages_list.append({"role": "user", "content": user_message})
         # Use the processor to apply the chat template
         prompt = processor.tokenizer.apply_chat_template(
-            messages_list,
             tokenize=False,
             add_generation_prompt=True
         )
@@ -85,8 +80,10 @@ def generate_response(user_message, messages_list, image_pil, max_new_tokens, to
         thread.start()
         # Yield the generated tokens as they become available
         for new_token in streamer:
-            yield new_token
     except Exception as e:
         print(f"Error during inference: {e}")
@@ -103,12 +100,10 @@ def process_chat(user_message, chatbot_display, messages_list, image_pil, max_ne
     # Append user's message to the chatbot display list
     chatbot_display.append((user_message, ""))
-    # Initialize the response as an empty string
-    response = ""
     # Generate the response using streaming
     for chunk in generate_response(user_message, messages_list, image_pil, max_new_tokens, top_p, top_k, temperature):
-        response += chunk
         # Update the chatbot display with the current response
         chatbot_display[-1] = (user_message, response)
         yield chatbot_display, messages_list, ""
@@ -184,4 +179,4 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutra
     )
 if __name__ == "__main__":
-    demo.launch()

 processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
+    torch_dtype=torch.float16,
+    device_map="auto",
     trust_remote_code=True
 )
 print("Model and processor loaded successfully.")
+# --- Define and apply a more flexible chat template ---
+chat_template = """{% for message in messages %}
+    {{ message['role'].capitalize() }}: {{ message['content'] }}
+    {% if not loop.last %}{{ '\n' }}{% endif %}
+{% endfor %}
+{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}"""
 processor.tokenizer.chat_template = chat_template
 # --- 2. Gradio Chatbot Logic ---
     Generate a response from the model using streaming.
     """
     try:
+        # Create a copy of the messages list to avoid modifying the original
+        current_messages = messages_list.copy()
+        current_messages.append({"role": "user", "content": user_message})
+        print(current_messages)
         # Use the processor to apply the chat template
         prompt = processor.tokenizer.apply_chat_template(
+            current_messages,
             tokenize=False,
             add_generation_prompt=True
         )
         thread.start()
         # Yield the generated tokens as they become available
+        response = ""
         for new_token in streamer:
+            response += new_token
+            yield response
     except Exception as e:
         print(f"Error during inference: {e}")
     # Append user's message to the chatbot display list
     chatbot_display.append((user_message, ""))
     # Generate the response using streaming
+    response = ""
     for chunk in generate_response(user_message, messages_list, image_pil, max_new_tokens, top_p, top_k, temperature):
+        response = chunk
         # Update the chatbot display with the current response
         chatbot_display[-1] = (user_message, response)
         yield chatbot_display, messages_list, ""
     )
 if __name__ == "__main__":
+    demo.launch(mcp_server=True)