Spaces:

akhaliq
/

MiniCPM-V-4_5

Running on Zero

App Files Files Community

MiniCPM-V-4_5 / app.py

akhaliq HF Staff

Upload app.py with huggingface_hub

ffb7a8f verified 8 days ago

raw

history blame contribute delete

5.05 kB

	import gradio as gr
	import torch
	from PIL import Image
	from transformers import AutoModel, AutoTokenizer
	import spaces

	# Initialize model and tokenizer
	torch.manual_seed(100)

	model = AutoModel.from_pretrained(
	'openbmb/MiniCPM-V-4_5',
	trust_remote_code=True,
	attn_implementation='sdpa',
	torch_dtype=torch.bfloat16
	)
	model = model.eval().cuda()
	tokenizer = AutoTokenizer.from_pretrained(
	'openbmb/MiniCPM-V-4_5',
	trust_remote_code=True
	)

	@spaces.GPU(duration=120)
	def respond(message, history, enable_thinking):
	"""
	Process user message and generate response
	"""
	# Build conversation history in the format expected by the model
	msgs = []

	# Add previous conversation history
	for h in history:
	user_msg = h[0]
	assistant_msg = h[1]

	# Parse user message for images and text
	user_content = []
	if isinstance(user_msg, tuple):
	# If user message contains an image
	img_path, text = user_msg
	img = Image.open(img_path).convert('RGB')
	user_content = [img, text] if text else [img]
	else:
	# Text only message
	user_content = [user_msg]

	msgs.append({"role": "user", "content": user_content})
	if assistant_msg:
	msgs.append({"role": "assistant", "content": [assistant_msg]})

	# Add current message
	current_content = []
	if isinstance(message, dict):
	# Handle multimodal input
	if message.get("files"):
	for file_path in message["files"]:
	img = Image.open(file_path).convert('RGB')
	current_content.append(img)
	if message.get("text"):
	current_content.append(message["text"])
	else:
	# Handle text-only input
	current_content = [message]

	msgs.append({"role": "user", "content": current_content})

	# Generate response
	try:
	answer = model.chat(
	msgs=msgs,
	tokenizer=tokenizer,
	enable_thinking=enable_thinking
	)
	return answer
	except Exception as e:
	return f"Error: {str(e)}"

	# Create Gradio interface
	with gr.Blocks(title="MiniCPM-V Chatbot") as demo:
	gr.Markdown(
	"""
	# 🤖 MiniCPM-V Multimodal Chatbot

	Upload images and ask questions about them, or have a text conversation.
	The model supports multi-turn conversations with context memory.
	"""
	)

	with gr.Row():
	with gr.Column(scale=4):
	chatbot = gr.Chatbot(
	height=500,
	show_label=False,
	container=True,
	type="tuples"
	)

	with gr.Row():
	msg = gr.MultimodalTextbox(
	interactive=True,
	file_types=["image"],
	placeholder="Type a message or upload an image...",
	show_label=False,
	container=False
	)

	with gr.Row():
	clear = gr.Button("🗑️ Clear", size="sm")
	submit = gr.Button("📤 Send", variant="primary", size="sm")

	with gr.Column(scale=1):
	gr.Markdown("### Settings")
	enable_thinking = gr.Checkbox(
	label="Enable Thinking Mode",
	value=False,
	info="Enable the model's thinking process"
	)

	gr.Markdown(
	"""
	### Examples
	- Upload an image and ask "What is in this picture?"
	- Ask "What are the main objects visible?"
	- Follow up with "What should I pay attention to here?"
	"""
	)

	# Handle message submission
	def user_submit(message, history, enable_thinking):
	# Format the user message for display
	if isinstance(message, dict) and message.get("files"):
	# If there are files, create tuple format for chatbot display
	user_msg = (message["files"][0], message.get("text", ""))
	else:
	user_msg = message.get("text", "") if isinstance(message, dict) else message

	# Add user message to history
	history = history + [(user_msg, None)]

	# Generate response
	response = respond(message, history[:-1], enable_thinking)

	# Update history with response
	history[-1] = (history[-1][0], response)

	return "", history

	# Event handlers
	msg.submit(
	user_submit,
	inputs=[msg, chatbot, enable_thinking],
	outputs=[msg, chatbot]
	)

	submit.click(
	user_submit,
	inputs=[msg, chatbot, enable_thinking],
	outputs=[msg, chatbot]
	)

	clear.click(
	lambda: (None, []),
	outputs=[msg, chatbot]
	)

	if __name__ == "__main__":
	demo.launch(share=True)