Spaces:

KingNish
/

Patram-7b-Demo

Paused

App Files Files Community

KingNish commited on 6 days ago

Commit

698861e

verified ·

1 Parent(s): c547944

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -29

app.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import torch
-from transformers import AutoProcessor, AutoModelForCausalLM, GenerationConfig
 from PIL import Image
 import gradio as gr
 import spaces
 # --- 1. Model and Processor Setup ---
 model_id = "bharatgenai/patram-7b-instruct"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
@@ -37,15 +39,15 @@ processor.tokenizer.chat_template = chat_template
 # --- 2. Gradio Chatbot Logic ---
 @spaces.GPU
-def process_chat(user_message, chatbot_display, messages_list, image_pil):
-    if image_pil is None:
-        chatbot_display.append((user_message, "Please upload an image first to start the conversation."))
-        return chatbot_display, messages_list, ""
-    messages_list.append({"role": "user", "content": user_message})
-    chatbot_display.append((user_message, None))
     try:
         prompt = processor.tokenizer.apply_chat_template(
             messages_list,
             tokenize=False,
@@ -55,44 +57,87 @@ def process_chat(user_message, chatbot_display, messages_list, image_pil):
         # Preprocess image and the entire formatted prompt
         inputs = processor.process(images=[image_pil], text=prompt)
         inputs = {k: v.to(device).unsqueeze(0) for k, v in inputs.items()}
-        # Ensure all tensors are in the same dtype
         inputs = {k: v.half() if v.dtype == torch.float32 else v for k, v in inputs.items()}
         # Generate output using model's specific method
-        output = model.generate_from_batch(
-            inputs,
-            GenerationConfig(max_new_tokens=512, do_sample=True, top_p=0.9, temperature=0.6, stop_strings="<|endoftext|>"),
             tokenizer=processor.tokenizer
         )
-        generated_tokens = output[0, inputs['input_ids'].size(1):]
-        response = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
-        messages_list.append({"role": "assistant", "content": response})
-        chatbot_display[-1] = (user_message, response)
     except Exception as e:
         print(f"Error during inference: {e}")
-        error_message = f"Sorry, an error occurred during processing: {e}"
-        chatbot_display[-1] = (user_message, error_message)
-    return chatbot_display, messages_list, ""
-def clear_chat(chatbot_display, messages_list, image_input):
     """Resets the chat, history, and image."""
-    return [], [], None, "Type your question here..."
 # --- 3. Gradio Interface Definition ---
 with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo:
     gr.Markdown("# 🤖 Patram-7B-Instruct Chatbot")
     gr.Markdown("Upload an image and ask questions about it. The chatbot will remember the conversation context.")
     messages_list = gr.State([])
     with gr.Row():
         with gr.Column(scale=1):
-            image_input = gr.Image(type="pil", label="Upload Image")
             clear_btn = gr.Button("🗑️ Clear Chat and Image")
         with gr.Column(scale=2):
             chatbot_display = gr.Chatbot(
@@ -110,23 +155,33 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutra
                 submit_btn = gr.Button("Send", variant="primary", scale=1, min_width=0)
     # --- Event Listeners ---
     submit_action = user_textbox.submit(
         fn=process_chat,
-        inputs=[user_textbox, chatbot_display, messages_list, image_input],
         outputs=[chatbot_display, messages_list, user_textbox]
     )
     submit_btn.click(
         fn=process_chat,
-        inputs=[user_textbox, chatbot_display, messages_list, image_input],
         outputs=[chatbot_display, messages_list, user_textbox]
     )
     clear_btn.click(
-        fn=lambda: ([], [], None, ""),
         inputs=[],
-        outputs=[chatbot_display, messages_list, image_input, user_textbox],
         queue=False
     )
 if __name__ == "__main__":
-    demo.launch(mcp_server=True)

 import torch
+from transformers import AutoProcessor, AutoModelForCausalLM, GenerationConfig, TextIteratorStreamer
 from PIL import Image
 import gradio as gr
 import spaces
+import threading
 # --- 1. Model and Processor Setup ---
 model_id = "bharatgenai/patram-7b-instruct"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"Using device: {device}")
 # --- 2. Gradio Chatbot Logic ---
 @spaces.GPU
+def generate_response(user_message, messages_list, image_pil, max_new_tokens, top_p, top_k, temperature):
+    """
+    Generate a response from the model using streaming.
+    """
     try:
+        # Append user's message to the conversation history for the model
+        messages_list.append({"role": "user", "content": user_message})
+        # Use the processor to apply the chat template
         prompt = processor.tokenizer.apply_chat_template(
             messages_list,
             tokenize=False,
         # Preprocess image and the entire formatted prompt
         inputs = processor.process(images=[image_pil], text=prompt)
         inputs = {k: v.to(device).unsqueeze(0) for k, v in inputs.items()}
         inputs = {k: v.half() if v.dtype == torch.float32 else v for k, v in inputs.items()}
+        # Initialize the streamer
+        streamer = TextIteratorStreamer(processor.tokenizer, skip_prompt=True, skip_special_tokens=True)
+        # Define generation config
+        generation_config = GenerationConfig(
+            max_new_tokens=max_new_tokens,
+            do_sample=True,
+            top_p=top_p,
+            top_k=top_k,
+            temperature=temperature,
+            stop_strings="<|endoftext|>"
+        )
         # Generate output using model's specific method
+        generate_kwargs = dict(
+            **inputs,
+            streamer=streamer,
+            generation_config=generation_config,
             tokenizer=processor.tokenizer
         )
+        # Start the generation in a separate thread to allow streaming
+        thread = threading.Thread(target=model.generate_from_batch, kwargs=generate_kwargs)
+        thread.start()
+        # Yield the generated tokens as they become available
+        for new_token in streamer:
+            yield new_token
     except Exception as e:
         print(f"Error during inference: {e}")
+        yield f"Sorry, an error occurred during processing: {e}"
+def process_chat(user_message, chatbot_display, messages_list, image_pil, max_new_tokens, top_p, top_k, temperature):
+    """
+    This function handles the chat logic for a single turn with streaming.
+    """
+    if image_pil is None:
+        chatbot_display.append((user_message, "Please upload an image first to start the conversation."))
+        return chatbot_display, messages_list, ""
+    # Append user's message to the chatbot display list
+    chatbot_display.append((user_message, ""))
+    # Initialize the response as an empty string
+    response = ""
+    # Generate the response using streaming
+    for chunk in generate_response(user_message, messages_list, image_pil, max_new_tokens, top_p, top_k, temperature):
+        response += chunk
+        # Update the chatbot display with the current response
+        chatbot_display[-1] = (user_message, response)
+        yield chatbot_display, messages_list, ""
+    # Append assistant's response to the conversation history
+    messages_list.append({"role": "assistant", "content": response})
+def clear_chat():
     """Resets the chat, history, and image."""
+    return [], [], None, "", 256, 0.9, 50, 0.6
 # --- 3. Gradio Interface Definition ---
 with gr.Blocks(theme=gr.themes.Default(primary_hue="blue", secondary_hue="neutral")) as demo:
     gr.Markdown("# 🤖 Patram-7B-Instruct Chatbot")
     gr.Markdown("Upload an image and ask questions about it. The chatbot will remember the conversation context.")
+    # State variables to hold conversation history and image
     messages_list = gr.State([])
+    image_input = gr.State(None)
     with gr.Row():
         with gr.Column(scale=1):
+            image_input_render = gr.Image(type="pil", label="Upload Image")
             clear_btn = gr.Button("🗑️ Clear Chat and Image")
+            with gr.Accordion("Generation Parameters", open=False):
+                max_new_tokens = gr.Slider(minimum=32, maximum=512, value=256, step=32, label="Max New Tokens")
+                top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.1, label="Top-p (Nucleus Sampling)")
+                top_k = gr.Slider(minimum=1, maximum=100, value=50, step=1, label="Top-k")
+                temperature = gr.Slider(minimum=0.1, maximum=1.5, value=0.6, step=0.1, label="Temperature")
         with gr.Column(scale=2):
             chatbot_display = gr.Chatbot(
                 submit_btn = gr.Button("Send", variant="primary", scale=1, min_width=0)
     # --- Event Listeners ---
+    # Define the action for submitting a message (via button or enter key)
     submit_action = user_textbox.submit(
         fn=process_chat,
+        inputs=[user_textbox, chatbot_display, messages_list, image_input, max_new_tokens, top_p, top_k, temperature],
         outputs=[chatbot_display, messages_list, user_textbox]
     )
     submit_btn.click(
         fn=process_chat,
+        inputs=[user_textbox, chatbot_display, messages_list, image_input, max_new_tokens, top_p, top_k, temperature],
         outputs=[chatbot_display, messages_list, user_textbox]
     )
+    # Define the action for the clear button
     clear_btn.click(
+        fn=clear_chat,
         inputs=[],
+        outputs=[chatbot_display, messages_list, image_input_render, user_textbox, max_new_tokens, top_p, top_k, temperature],
         queue=False
     )
+    # Update the image state when a new image is uploaded
+    image_input_render.change(
+        fn=lambda x: x,
+        inputs=image_input_render,
+        outputs=image_input
+    )
 if __name__ == "__main__":
+    demo.launch()