Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import torch | |
from PIL import Image | |
from transformers import AutoModel, AutoTokenizer | |
import spaces | |
# Initialize model and tokenizer | |
torch.manual_seed(100) | |
model = AutoModel.from_pretrained( | |
'openbmb/MiniCPM-V-4_5', | |
trust_remote_code=True, | |
attn_implementation='sdpa', | |
torch_dtype=torch.bfloat16 | |
) | |
model = model.eval().cuda() | |
tokenizer = AutoTokenizer.from_pretrained( | |
'openbmb/MiniCPM-V-4_5', | |
trust_remote_code=True | |
) | |
def respond(message, history, enable_thinking): | |
""" | |
Process user message and generate response | |
""" | |
# Build conversation history in the format expected by the model | |
msgs = [] | |
# Add previous conversation history | |
for h in history: | |
user_msg = h[0] | |
assistant_msg = h[1] | |
# Parse user message for images and text | |
user_content = [] | |
if isinstance(user_msg, tuple): | |
# If user message contains an image | |
img_path, text = user_msg | |
img = Image.open(img_path).convert('RGB') | |
user_content = [img, text] if text else [img] | |
else: | |
# Text only message | |
user_content = [user_msg] | |
msgs.append({"role": "user", "content": user_content}) | |
if assistant_msg: | |
msgs.append({"role": "assistant", "content": [assistant_msg]}) | |
# Add current message | |
current_content = [] | |
if isinstance(message, dict): | |
# Handle multimodal input | |
if message.get("files"): | |
for file_path in message["files"]: | |
img = Image.open(file_path).convert('RGB') | |
current_content.append(img) | |
if message.get("text"): | |
current_content.append(message["text"]) | |
else: | |
# Handle text-only input | |
current_content = [message] | |
msgs.append({"role": "user", "content": current_content}) | |
# Generate response | |
try: | |
answer = model.chat( | |
msgs=msgs, | |
tokenizer=tokenizer, | |
enable_thinking=enable_thinking | |
) | |
return answer | |
except Exception as e: | |
return f"Error: {str(e)}" | |
# Create Gradio interface | |
with gr.Blocks(title="MiniCPM-V Chatbot") as demo: | |
gr.Markdown( | |
""" | |
# π€ MiniCPM-V Multimodal Chatbot | |
Upload images and ask questions about them, or have a text conversation. | |
The model supports multi-turn conversations with context memory. | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(scale=4): | |
chatbot = gr.Chatbot( | |
height=500, | |
show_label=False, | |
container=True, | |
type="tuples" | |
) | |
with gr.Row(): | |
msg = gr.MultimodalTextbox( | |
interactive=True, | |
file_types=["image"], | |
placeholder="Type a message or upload an image...", | |
show_label=False, | |
container=False | |
) | |
with gr.Row(): | |
clear = gr.Button("ποΈ Clear", size="sm") | |
submit = gr.Button("π€ Send", variant="primary", size="sm") | |
with gr.Column(scale=1): | |
gr.Markdown("### Settings") | |
enable_thinking = gr.Checkbox( | |
label="Enable Thinking Mode", | |
value=False, | |
info="Enable the model's thinking process" | |
) | |
gr.Markdown( | |
""" | |
### Examples | |
- Upload an image and ask "What is in this picture?" | |
- Ask "What are the main objects visible?" | |
- Follow up with "What should I pay attention to here?" | |
""" | |
) | |
# Handle message submission | |
def user_submit(message, history, enable_thinking): | |
# Format the user message for display | |
if isinstance(message, dict) and message.get("files"): | |
# If there are files, create tuple format for chatbot display | |
user_msg = (message["files"][0], message.get("text", "")) | |
else: | |
user_msg = message.get("text", "") if isinstance(message, dict) else message | |
# Add user message to history | |
history = history + [(user_msg, None)] | |
# Generate response | |
response = respond(message, history[:-1], enable_thinking) | |
# Update history with response | |
history[-1] = (history[-1][0], response) | |
return "", history | |
# Event handlers | |
msg.submit( | |
user_submit, | |
inputs=[msg, chatbot, enable_thinking], | |
outputs=[msg, chatbot] | |
) | |
submit.click( | |
user_submit, | |
inputs=[msg, chatbot, enable_thinking], | |
outputs=[msg, chatbot] | |
) | |
clear.click( | |
lambda: (None, []), | |
outputs=[msg, chatbot] | |
) | |
if __name__ == "__main__": | |
demo.launch(share=True) |