File size: 11,003 Bytes
9933c70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1a1eaff
 
9933c70
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModel, AutoImageProcessor
from PIL import Image
import gc
import os
import spaces

# Model configuration
MODEL_PATH = "nvidia/Llama-Nemotron-Nano-VL-8B-V1"

# Load model globally
print("Loading model...")
model = AutoModel.from_pretrained(
    MODEL_PATH,
    torch_dtype=torch.bfloat16,
    low_cpu_mem_usage=True,
    trust_remote_code=True,
).eval()

tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
image_processor = AutoImageProcessor.from_pretrained(
    MODEL_PATH, 
    trust_remote_code=True
)
print("Model loaded successfully!")

def move_to_device(obj, device):
    """Recursively move tensors to device"""
    if torch.is_tensor(obj):
        return obj.to(device)
    elif isinstance(obj, dict):
        return {k: move_to_device(v, device) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [move_to_device(v, device) for v in obj]
    elif isinstance(obj, tuple):
        return tuple(move_to_device(v, device) for v in obj)
    elif hasattr(obj, 'to'):
        return obj.to(device)
    else:
        return obj

@spaces.GPU(duration=60)
def chat_text_only(message):
    try:
        device = "cuda"
        
        # Move entire model to GPU
        model.to(device)
        
        generation_config = dict(
            max_new_tokens=512, 
            do_sample=True,
            temperature=0.7,
            eos_token_id=tokenizer.eos_token_id
        )
        
        # Tokenize on CPU then move to GPU
        inputs = tokenizer(message, return_tensors="pt")
        inputs = move_to_device(inputs, device)
        
        # Generate
        with torch.no_grad():
            response, _ = model.chat(
                tokenizer, 
                None, 
                message, 
                generation_config, 
                history=None, 
                return_history=True
            )
        
        # Move model back to CPU
        model.to("cpu")
        torch.cuda.empty_cache()
        gc.collect()
        
        return response
        
    except Exception as e:
        # Ensure model is back on CPU even if error occurs
        model.to("cpu")
        torch.cuda.empty_cache()
        gc.collect()
        return f"Error: {str(e)}"

@spaces.GPU(duration=60)
def chat_with_image(image, message):
    if image is None:
        return "Please upload an image."
    
    try:
        device = "cuda"
        
        # Move entire model to GPU
        model.to(device)
        
        generation_config = dict(
            max_new_tokens=512, 
            do_sample=True,
            temperature=0.7,
            eos_token_id=tokenizer.eos_token_id
        )
        
        # Process image
        image_features = image_processor(image)
        
        # Move all image features to GPU
        image_features = move_to_device(image_features, device)
        
        # Add image token to message if not present
        if "<image>" not in message:
            message = f"<image>\n{message}"
        
        # Generate
        with torch.no_grad():
            response = model.chat(
                tokenizer=tokenizer, 
                question=message, 
                generation_config=generation_config,
                **image_features
            )
        
        # Move model back to CPU
        model.to("cpu")
        torch.cuda.empty_cache()
        gc.collect()
        
        return response
        
    except Exception as e:
        # Ensure model is back on CPU even if error occurs
        model.to("cpu")
        torch.cuda.empty_cache()
        gc.collect()
        return f"Error: {str(e)}"

@spaces.GPU(duration=60)
def chat_with_two_images(image1, image2, message):
    if image1 is None or image2 is None:
        return "Please upload both images."
    
    try:
        device = "cuda"
        
        # Move entire model to GPU
        model.to(device)
        
        generation_config = dict(
            max_new_tokens=512, 
            do_sample=True,
            temperature=0.7,
            eos_token_id=tokenizer.eos_token_id
        )
        
        # Process both images
        image_features = image_processor([image1, image2])
        
        # Move all image features to GPU
        image_features = move_to_device(image_features, device)
        
        # Format message for two images
        if "<image-1>" not in message and "<image-2>" not in message:
            message = f"<image-1>: <image>\n<image-2>: <image>\n{message}"
        
        # Generate
        with torch.no_grad():
            response = model.chat(
                tokenizer=tokenizer, 
                question=message, 
                generation_config=generation_config,
                **image_features
            )
        
        # Move model back to CPU
        model.to("cpu")
        torch.cuda.empty_cache()
        gc.collect()
        
        return response
        
    except Exception as e:
        # Ensure model is back on CPU even if error occurs
        model.to("cpu")
        torch.cuda.empty_cache()
        gc.collect()
        return f"Error: {str(e)}"

# Create Gradio interface
def create_interface():
    with gr.Blocks(title="Llama Nemotron Nano VL 8B", theme=gr.themes.Soft()) as demo:
        gr.Markdown("# 🦙 Llama Nemotron Nano VL 8B Vision-Language Model")
        gr.Markdown("Chat with a powerful vision-language model that can understand both text and images!")
        
        with gr.Tabs():
            # Text-only chat tab
            with gr.TabItem("💬 Text Chat"):
                gr.Markdown("### Chat with the model using text only")
                
                with gr.Row():
                    with gr.Column():
                        text_input = gr.Textbox(
                            label="Your message",
                            placeholder="Ask me anything...",
                            lines=3
                        )
                        text_submit = gr.Button("Send", variant="primary")
                    
                    with gr.Column():
                        text_output = gr.Textbox(
                            label="Model Response",
                            lines=10,
                            max_lines=20
                        )
                
                text_submit.click(
                    chat_text_only,
                    inputs=[text_input],
                    outputs=[text_output]
                )
                
                # Example questions
                gr.Examples(
                    examples=[
                        ["What is artificial intelligence?"],
                        ["Explain quantum computing in simple terms."],
                        ["What happened in 1969?"],
                        ["Write a short story about a robot."]
                    ],
                    inputs=[text_input]
                )
            
            # Single image chat tab
            with gr.TabItem("🖼️ Image + Text Chat"):
                gr.Markdown("### Upload an image and ask questions about it")
                
                with gr.Row():
                    with gr.Column():
                        image_input = gr.Image(
                            label="Upload Image",
                            type="pil"
                        )
                        image_text_input = gr.Textbox(
                            label="Your question about the image",
                            placeholder="What do you see in this image?",
                            lines=3
                        )
                        image_submit = gr.Button("Analyze", variant="primary")
                    
                    with gr.Column():
                        image_output = gr.Textbox(
                            label="Model Response",
                            lines=10,
                            max_lines=20
                        )
                
                image_submit.click(
                    chat_with_image,
                    inputs=[image_input, image_text_input],
                    outputs=[image_output]
                )
                
                # Example prompts
                gr.Examples(
                    examples=[
                        ["Describe what you see in this image."],
                        ["What objects are in this image?"],
                        ["Extract any text from this image."],
                        ["What is the main subject of this image?"]
                    ],
                    inputs=[image_text_input]
                )
            
            # Two images comparison tab
            with gr.TabItem("🖼️🖼️ Compare Two Images"):
                gr.Markdown("### Upload two images and ask the model to compare them")
                
                with gr.Row():
                    with gr.Column():
                        image1_input = gr.Image(
                            label="First Image",
                            type="pil"
                        )
                        image2_input = gr.Image(
                            label="Second Image", 
                            type="pil"
                        )
                        two_images_text_input = gr.Textbox(
                            label="Your question about both images",
                            placeholder="Compare these two images...",
                            lines=3
                        )
                        two_images_submit = gr.Button("Compare", variant="primary")
                    
                    with gr.Column():
                        two_images_output = gr.Textbox(
                            label="Model Response",
                            lines=10,
                            max_lines=20
                        )
                
                two_images_submit.click(
                    chat_with_two_images,
                    inputs=[image1_input, image2_input, two_images_text_input],
                    outputs=[two_images_output]
                )
                
                # Example prompts
                gr.Examples(
                    examples=[
                        ["What are the main differences between these two images?"],
                        ["Describe both images briefly."],
                        ["Which image is more colorful?"],
                        ["Compare the subjects in these images."]
                    ],
                    inputs=[two_images_text_input]
                )
        
        # Footer
        gr.Markdown("---")
        gr.Markdown("⚡ Powered by NVIDIA Llama Nemotron Nano VL 8B")
    
    return demo

# Create and launch the interface
if __name__ == "__main__":
    demo = create_interface()
    demo.queue()  # Enable queuing for Zero GPU
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        ssr_mode=False
    )