Gapeleon's picture
Solves 500 error for some Users (#1)
1a1eaff verified
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModel, AutoImageProcessor
from PIL import Image
import gc
import os
import spaces
# Model configuration
MODEL_PATH = "nvidia/Llama-Nemotron-Nano-VL-8B-V1"
# Load model globally
print("Loading model...")
model = AutoModel.from_pretrained(
MODEL_PATH,
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True,
trust_remote_code=True,
).eval()
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
image_processor = AutoImageProcessor.from_pretrained(
MODEL_PATH,
trust_remote_code=True
)
print("Model loaded successfully!")
def move_to_device(obj, device):
"""Recursively move tensors to device"""
if torch.is_tensor(obj):
return obj.to(device)
elif isinstance(obj, dict):
return {k: move_to_device(v, device) for k, v in obj.items()}
elif isinstance(obj, list):
return [move_to_device(v, device) for v in obj]
elif isinstance(obj, tuple):
return tuple(move_to_device(v, device) for v in obj)
elif hasattr(obj, 'to'):
return obj.to(device)
else:
return obj
@spaces.GPU(duration=60)
def chat_text_only(message):
try:
device = "cuda"
# Move entire model to GPU
model.to(device)
generation_config = dict(
max_new_tokens=512,
do_sample=True,
temperature=0.7,
eos_token_id=tokenizer.eos_token_id
)
# Tokenize on CPU then move to GPU
inputs = tokenizer(message, return_tensors="pt")
inputs = move_to_device(inputs, device)
# Generate
with torch.no_grad():
response, _ = model.chat(
tokenizer,
None,
message,
generation_config,
history=None,
return_history=True
)
# Move model back to CPU
model.to("cpu")
torch.cuda.empty_cache()
gc.collect()
return response
except Exception as e:
# Ensure model is back on CPU even if error occurs
model.to("cpu")
torch.cuda.empty_cache()
gc.collect()
return f"Error: {str(e)}"
@spaces.GPU(duration=60)
def chat_with_image(image, message):
if image is None:
return "Please upload an image."
try:
device = "cuda"
# Move entire model to GPU
model.to(device)
generation_config = dict(
max_new_tokens=512,
do_sample=True,
temperature=0.7,
eos_token_id=tokenizer.eos_token_id
)
# Process image
image_features = image_processor(image)
# Move all image features to GPU
image_features = move_to_device(image_features, device)
# Add image token to message if not present
if "<image>" not in message:
message = f"<image>\n{message}"
# Generate
with torch.no_grad():
response = model.chat(
tokenizer=tokenizer,
question=message,
generation_config=generation_config,
**image_features
)
# Move model back to CPU
model.to("cpu")
torch.cuda.empty_cache()
gc.collect()
return response
except Exception as e:
# Ensure model is back on CPU even if error occurs
model.to("cpu")
torch.cuda.empty_cache()
gc.collect()
return f"Error: {str(e)}"
@spaces.GPU(duration=60)
def chat_with_two_images(image1, image2, message):
if image1 is None or image2 is None:
return "Please upload both images."
try:
device = "cuda"
# Move entire model to GPU
model.to(device)
generation_config = dict(
max_new_tokens=512,
do_sample=True,
temperature=0.7,
eos_token_id=tokenizer.eos_token_id
)
# Process both images
image_features = image_processor([image1, image2])
# Move all image features to GPU
image_features = move_to_device(image_features, device)
# Format message for two images
if "<image-1>" not in message and "<image-2>" not in message:
message = f"<image-1>: <image>\n<image-2>: <image>\n{message}"
# Generate
with torch.no_grad():
response = model.chat(
tokenizer=tokenizer,
question=message,
generation_config=generation_config,
**image_features
)
# Move model back to CPU
model.to("cpu")
torch.cuda.empty_cache()
gc.collect()
return response
except Exception as e:
# Ensure model is back on CPU even if error occurs
model.to("cpu")
torch.cuda.empty_cache()
gc.collect()
return f"Error: {str(e)}"
# Create Gradio interface
def create_interface():
with gr.Blocks(title="Llama Nemotron Nano VL 8B", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🦙 Llama Nemotron Nano VL 8B Vision-Language Model")
gr.Markdown("Chat with a powerful vision-language model that can understand both text and images!")
with gr.Tabs():
# Text-only chat tab
with gr.TabItem("💬 Text Chat"):
gr.Markdown("### Chat with the model using text only")
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Your message",
placeholder="Ask me anything...",
lines=3
)
text_submit = gr.Button("Send", variant="primary")
with gr.Column():
text_output = gr.Textbox(
label="Model Response",
lines=10,
max_lines=20
)
text_submit.click(
chat_text_only,
inputs=[text_input],
outputs=[text_output]
)
# Example questions
gr.Examples(
examples=[
["What is artificial intelligence?"],
["Explain quantum computing in simple terms."],
["What happened in 1969?"],
["Write a short story about a robot."]
],
inputs=[text_input]
)
# Single image chat tab
with gr.TabItem("🖼️ Image + Text Chat"):
gr.Markdown("### Upload an image and ask questions about it")
with gr.Row():
with gr.Column():
image_input = gr.Image(
label="Upload Image",
type="pil"
)
image_text_input = gr.Textbox(
label="Your question about the image",
placeholder="What do you see in this image?",
lines=3
)
image_submit = gr.Button("Analyze", variant="primary")
with gr.Column():
image_output = gr.Textbox(
label="Model Response",
lines=10,
max_lines=20
)
image_submit.click(
chat_with_image,
inputs=[image_input, image_text_input],
outputs=[image_output]
)
# Example prompts
gr.Examples(
examples=[
["Describe what you see in this image."],
["What objects are in this image?"],
["Extract any text from this image."],
["What is the main subject of this image?"]
],
inputs=[image_text_input]
)
# Two images comparison tab
with gr.TabItem("🖼️🖼️ Compare Two Images"):
gr.Markdown("### Upload two images and ask the model to compare them")
with gr.Row():
with gr.Column():
image1_input = gr.Image(
label="First Image",
type="pil"
)
image2_input = gr.Image(
label="Second Image",
type="pil"
)
two_images_text_input = gr.Textbox(
label="Your question about both images",
placeholder="Compare these two images...",
lines=3
)
two_images_submit = gr.Button("Compare", variant="primary")
with gr.Column():
two_images_output = gr.Textbox(
label="Model Response",
lines=10,
max_lines=20
)
two_images_submit.click(
chat_with_two_images,
inputs=[image1_input, image2_input, two_images_text_input],
outputs=[two_images_output]
)
# Example prompts
gr.Examples(
examples=[
["What are the main differences between these two images?"],
["Describe both images briefly."],
["Which image is more colorful?"],
["Compare the subjects in these images."]
],
inputs=[two_images_text_input]
)
# Footer
gr.Markdown("---")
gr.Markdown("⚡ Powered by NVIDIA Llama Nemotron Nano VL 8B")
return demo
# Create and launch the interface
if __name__ == "__main__":
demo = create_interface()
demo.queue() # Enable queuing for Zero GPU
demo.launch(
server_name="0.0.0.0",
server_port=7860,
ssr_mode=False
)