File size: 12,135 Bytes
25a1f05
 
 
b0b8066
 
25a1f05
 
b0d6a54
25a1f05
b0b8066
8cb4e28
25a1f05
b0b8066
25a1f05
 
 
 
8cb4e28
 
 
b0b8066
 
 
8cb4e28
 
b0b8066
 
 
 
 
 
8cb4e28
9dc1a4f
 
25a1f05
b0b8066
8cb4e28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9dc1a4f
25a1f05
 
 
 
 
 
 
9dc1a4f
 
 
 
 
 
8cb4e28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25a1f05
b0b8066
 
 
25a1f05
b0b8066
25a1f05
 
 
 
 
b0b8066
9dc1a4f
25a1f05
b0b8066
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25a1f05
 
 
 
 
 
 
b0b8066
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25a1f05
8cb4e28
25a1f05
 
8cb4e28
 
 
25a1f05
8cb4e28
25a1f05
8cb4e28
25a1f05
 
 
 
9dc1a4f
 
 
8cb4e28
 
 
 
9dc1a4f
 
 
 
 
8cb4e28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9dc1a4f
 
 
c03320e
8cb4e28
 
 
 
 
 
 
 
 
 
 
efc57b1
 
 
8cb4e28
 
 
efc57b1
 
 
8cb4e28
 
25a1f05
8cb4e28
25a1f05
 
8cb4e28
25a1f05
 
8cb4e28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab7de20
b0b8066
 
 
8cb4e28
 
 
 
b0b8066
 
 
 
25a1f05
 
8cb4e28
9dc1a4f
 
03b567f
8cb4e28
 
 
 
 
 
 
 
 
 
 
 
 
b0b8066
9dc1a4f
 
 
8cb4e28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b0b8066
 
 
03b567f
25a1f05
 
 
 
 
 
b0b8066
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
import gradio as gr
import os
from openai import OpenAI
import json
import time

# OpenRouter API key
OPENROUTER_API_KEY = "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"

# Available models categorized by type
TEXT_MODELS = {
    "Mistral Small": "mistralai/mistral-small-3.2-24b-instruct:free",
    "Claude 3 Haiku": "anthropic/claude-3-haiku",
    "Mistral 3.1": "mistralai/mistral-small-3.1-24b-instruct:free",
    "Gemma": "google/gemma-3-4b-it:free",
}

# Available image models
IMAGE_MODELS = {
    "Kimi Vision": "moonshotai/kimi-vl-a3b-thinking:free",
    "Qwen Vision": "qwen/qwen2.5-vl-32b-instruct:free",
    "Gemma Vision": "google/gemma-3-4b-it:free",
    "Llama 3 Vision": "meta-llama/llama-3.2-11b-vision-instruct:free",
}

# Token usage tracking
token_usage = {
    "total_input_tokens": 0,
    "total_output_tokens": 0,
    "model_usage": {}
}

# Initialize chat history
history = []

# Helper function to convert image to base64
import base64
from PIL import Image
import io

def image_to_base64(image):
    """Convert an image to base64 encoding"""
    # If image is a file path
    if isinstance(image, str):
        with open(image, "rb") as img_file:
            return base64.b64encode(img_file.read()).decode()
    
    # If image is already a PIL Image
    buffered = io.BytesIO()
    image.save(buffered, format="JPEG")
    return base64.b64encode(buffered.getvalue()).decode()

def chat_with_ai(message, model_name, history, image=None):
    """Chat with the selected OpenRouter model"""
    try:
        # Initialize OpenAI client with OpenRouter base URL
        client = OpenAI(
            base_url="https://openrouter.ai/api/v1",
            api_key=OPENROUTER_API_KEY,
        )
        
        # Format the conversation history for the API
        messages = []
        for human, assistant in history:
            messages.append({"role": "user", "content": human})
            messages.append({"role": "assistant", "content": assistant})
        
        # Check if we're using an image model and have an image
        if model_name in IMAGE_MODELS and image is not None:
            # Convert image to base64
            img_base64 = image_to_base64(image)
            
            # Add the current message with image
            messages.append({
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": message
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{img_base64}"
                        }
                    }
                ]
            })
        else:
            # Add the current message (text only)
            messages.append({"role": "user", "content": message})
        
        # Get the model ID based on the selected model name
        model_id = TEXT_MODELS.get(model_name) or IMAGE_MODELS.get(model_name)
        
        # Create the completion request
        start_time = time.time()
        completion = client.chat.completions.create(
            extra_headers={
                "HTTP-Referer": "https://gradio-openrouter-interface.com",
                "X-Title": "Gradio OpenRouter Interface",
            },
            model=model_id,
            messages=messages
        )
        end_time = time.time()
        
        # Update token usage statistics
        input_tokens = completion.usage.prompt_tokens
        output_tokens = completion.usage.completion_tokens
        
        token_usage["total_input_tokens"] += input_tokens
        token_usage["total_output_tokens"] += output_tokens
        
        if model_name not in token_usage["model_usage"]:
            token_usage["model_usage"][model_name] = {
                "input_tokens": 0,
                "output_tokens": 0,
                "requests": 0,
                "avg_response_time": 0
            }
        
        # Update model-specific usage
        model_stats = token_usage["model_usage"][model_name]
        model_stats["input_tokens"] += input_tokens
        model_stats["output_tokens"] += output_tokens
        model_stats["requests"] += 1
        
        # Calculate running average of response time
        response_time = end_time - start_time
        model_stats["avg_response_time"] = ((model_stats["avg_response_time"] * (model_stats["requests"] - 1)) + response_time) / model_stats["requests"]
        
        # Return the model's response
        return completion.choices[0].message.content
    
    except Exception as e:
        return f"Error: {str(e)}"

# Function to generate token usage report
def generate_usage_report():
    report = f"### Token Usage Statistics\n\n"
    report += f"**Total Input Tokens:** {token_usage['total_input_tokens']}\n"
    report += f"**Total Output Tokens:** {token_usage['total_output_tokens']}\n\n"
    
    report += "### Model-Specific Usage\n\n"
    report += "| Model | Input Tokens | Output Tokens | Requests | Avg Response Time (s) |\n"
    report += "|-------|--------------|---------------|----------|----------------------|\n"
    
    for model, stats in token_usage["model_usage"].items():
        report += f"| {model} | {stats['input_tokens']} | {stats['output_tokens']} | {stats['requests']} | {stats['avg_response_time']:.2f} |\n"
    
    return report

# Create the Gradio interface
with gr.Blocks(title="OpenRouter AI Multi-Modal Interface", css="style.css") as demo:
    gr.Markdown(
        """
        # πŸ€– OpenRouter AI Multi-Modal Interface
        
        Chat with multiple AI models powered by OpenRouter API - both text-only and vision models!
        
        *Select your model type (Text or Image), choose a specific model, and start interacting!*
        
        **All responses from image models will be provided in English**
        """
    )
    
    with gr.Row():
        with gr.Column(scale=4):
            # Chat interface
            chatbot = gr.Chatbot(height=500, label="Conversation")
            
            # Image upload (initially hidden)
            image_input = gr.Image(type="pil", label="Upload Image for Analysis", visible=False)
            
            msg = gr.Textbox(label="Your message", placeholder="Type your message here...")
            
            with gr.Row():
                submit_btn = gr.Button("Send", variant="primary")
                clear_btn = gr.Button("Clear Chat")
                
            # Instructions for image models
            image_instructions = gr.Markdown(
                """
                ### πŸ“· Image Analysis Instructions
                1. Upload an image using the panel above
                2. Ask a question about the image
                3. The AI will analyze the image and respond in English
                
                **Example prompts:**
                - "What's in this image?"
                - "Describe this scene in detail"
                - "What objects can you identify?"
                - "What's happening in this picture?"
                """,
                visible=False
            )
        
        with gr.Column(scale=1):
            # Model selection
            with gr.Group():
                model_type = gr.Radio(
                    choices=["Text Models", "Image Models"],
                    value="Text Models",
                    label="Model Type",
                    info="Choose between text-only or vision models"
                )
                
                # Function to update model dropdown based on selection
                def update_model_choices(model_type):
                    if model_type == "Text Models":
                        return {
                            model_dropdown: gr.update(choices=list(TEXT_MODELS.keys()), value="Mistral Small"),
                            image_input: gr.update(visible=False),
                            image_instructions: gr.update(visible=False)
                        }
                    else:  # Image Models
                        return {
                            model_dropdown: gr.update(choices=list(IMAGE_MODELS.keys()), value="Kimi Vision"),
                            image_input: gr.update(visible=True),
                            image_instructions: gr.update(visible=True)
                        }
                
                model_dropdown = gr.Dropdown(
                    choices=list(TEXT_MODELS.keys()), 
                    value="Mistral Small", 
                    label="Select AI Model",
                    info="Choose from different AI models"
                )
                
                # Connect the radio button to update the dropdown and show/hide image components
                model_type.change(fn=update_model_choices, inputs=model_type, outputs=[model_dropdown, image_input, image_instructions])
                
                with gr.Tabs():
                    with gr.TabItem("Text Models"):
                        gr.Markdown(
                            """
                            ### Available Text Models
                            - **Mistral Small**: Powerful language model from Mistral AI
                            - **Mistral 3.1**: Earlier version of Mistral's model
                            - **Gemma**: Google's lightweight language model
                            - **Llama 3**: Meta's large language model
                            """
                        )
                    
                    with gr.TabItem("Image Models"):
                        gr.Markdown(
                            """
                            ### Available Image Models
                            - **Kimi Vision**: Moonshot AI's vision-language model
                        
                            - **Qwen Vision**: Alibaba's vision-language model
                            - **Gemma Vision**: Google's lightweight vision model
                            - **Llama 3 Vision**: Meta's vision-language model
                            
                            *Note: All responses will be provided in English*
                            """
                        )
            
            # Token usage statistics
            usage_stats = gr.Markdown("### Token Usage Statistics\n\nNo usage data yet.")
            refresh_stats_btn = gr.Button("Refresh Usage Stats")
    
    # Set up the submit action
    def respond(message, chat_history, model, image, model_type):
        if not message.strip():
            return "", chat_history
        
        # Check if we need to use image
        use_image = model_type == "Image Models" and image is not None
        
        # Add a note if using image model but no image uploaded
        if model_type == "Image Models" and image is None:
            return "", chat_history + [(message, "Please upload an image first before sending your message.")]
        
        # Process with or without image
        if use_image:
            bot_message = chat_with_ai(message, model, chat_history, image)
        else:
            bot_message = chat_with_ai(message, model, chat_history)
        
        chat_history.append((message, bot_message))
        return "", chat_history
    
    # Connect the components
    submit_btn.click(
        respond, 
        [msg, chatbot, model_dropdown, image_input, model_type], 
        [msg, chatbot]
    )
    msg.submit(
        respond, 
        [msg, chatbot, model_dropdown, image_input, model_type], 
        [msg, chatbot]
    )
    
    # Clear chat and image
    def clear_all():
        return None, None
    
    clear_btn.click(clear_all, None, [chatbot, image_input], queue=False)
    
    # Update usage statistics
    refresh_stats_btn.click(generate_usage_report, None, usage_stats)

# For Hugging Face Spaces compatibility
if __name__ == "__main__":
    # Launch the interface
    demo.launch(share=True)
else:
    # For Hugging Face Spaces, we need to expose the app
    app = demo.launch(share=False, show_api=False)