import gradio as gr import os from openai import OpenAI import json import time # OpenRouter API key OPENROUTER_API_KEY = "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" # Available models categorized by type TEXT_MODELS = { "Mistral Small": "mistralai/mistral-small-3.2-24b-instruct:free", "Claude 3 Haiku": "anthropic/claude-3-haiku", "Mistral 3.1": "mistralai/mistral-small-3.1-24b-instruct:free", "Gemma": "google/gemma-3-4b-it:free", } # Available image models IMAGE_MODELS = { "Kimi Vision": "moonshotai/kimi-vl-a3b-thinking:free", "Qwen Vision": "qwen/qwen2.5-vl-32b-instruct:free", "Gemma Vision": "google/gemma-3-4b-it:free", "Llama 3 Vision": "meta-llama/llama-3.2-11b-vision-instruct:free", } # Token usage tracking token_usage = { "total_input_tokens": 0, "total_output_tokens": 0, "model_usage": {} } # Initialize chat history history = [] # Helper function to convert image to base64 import base64 from PIL import Image import io def image_to_base64(image): """Convert an image to base64 encoding""" # If image is a file path if isinstance(image, str): with open(image, "rb") as img_file: return base64.b64encode(img_file.read()).decode() # If image is already a PIL Image buffered = io.BytesIO() image.save(buffered, format="JPEG") return base64.b64encode(buffered.getvalue()).decode() def chat_with_ai(message, model_name, history, image=None): """Chat with the selected OpenRouter model""" try: # Initialize OpenAI client with OpenRouter base URL client = OpenAI( base_url="https://openrouter.ai/api/v1", api_key=OPENROUTER_API_KEY, ) # Format the conversation history for the API messages = [] for human, assistant in history: messages.append({"role": "user", "content": human}) messages.append({"role": "assistant", "content": assistant}) # Check if we're using an image model and have an image if model_name in IMAGE_MODELS and image is not None: # Convert image to base64 img_base64 = image_to_base64(image) # Add the current message with image messages.append({ "role": "user", "content": [ { "type": "text", "text": message }, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{img_base64}" } } ] }) else: # Add the current message (text only) messages.append({"role": "user", "content": message}) # Get the model ID based on the selected model name model_id = TEXT_MODELS.get(model_name) or IMAGE_MODELS.get(model_name) # Create the completion request start_time = time.time() completion = client.chat.completions.create( extra_headers={ "HTTP-Referer": "https://gradio-openrouter-interface.com", "X-Title": "Gradio OpenRouter Interface", }, model=model_id, messages=messages ) end_time = time.time() # Update token usage statistics input_tokens = completion.usage.prompt_tokens output_tokens = completion.usage.completion_tokens token_usage["total_input_tokens"] += input_tokens token_usage["total_output_tokens"] += output_tokens if model_name not in token_usage["model_usage"]: token_usage["model_usage"][model_name] = { "input_tokens": 0, "output_tokens": 0, "requests": 0, "avg_response_time": 0 } # Update model-specific usage model_stats = token_usage["model_usage"][model_name] model_stats["input_tokens"] += input_tokens model_stats["output_tokens"] += output_tokens model_stats["requests"] += 1 # Calculate running average of response time response_time = end_time - start_time model_stats["avg_response_time"] = ((model_stats["avg_response_time"] * (model_stats["requests"] - 1)) + response_time) / model_stats["requests"] # Return the model's response return completion.choices[0].message.content except Exception as e: return f"Error: {str(e)}" # Function to generate token usage report def generate_usage_report(): report = f"### Token Usage Statistics\n\n" report += f"**Total Input Tokens:** {token_usage['total_input_tokens']}\n" report += f"**Total Output Tokens:** {token_usage['total_output_tokens']}\n\n" report += "### Model-Specific Usage\n\n" report += "| Model | Input Tokens | Output Tokens | Requests | Avg Response Time (s) |\n" report += "|-------|--------------|---------------|----------|----------------------|\n" for model, stats in token_usage["model_usage"].items(): report += f"| {model} | {stats['input_tokens']} | {stats['output_tokens']} | {stats['requests']} | {stats['avg_response_time']:.2f} |\n" return report # Create the Gradio interface with gr.Blocks(title="OpenRouter AI Multi-Modal Interface", css="style.css") as demo: gr.Markdown( """ # 🤖 OpenRouter AI Multi-Modal Interface Chat with multiple AI models powered by OpenRouter API - both text-only and vision models! *Select your model type (Text or Image), choose a specific model, and start interacting!* **All responses from image models will be provided in English** """ ) with gr.Row(): with gr.Column(scale=4): # Chat interface chatbot = gr.Chatbot(height=500, label="Conversation") # Image upload (initially hidden) image_input = gr.Image(type="pil", label="Upload Image for Analysis", visible=False) msg = gr.Textbox(label="Your message", placeholder="Type your message here...") with gr.Row(): submit_btn = gr.Button("Send", variant="primary") clear_btn = gr.Button("Clear Chat") # Instructions for image models image_instructions = gr.Markdown( """ ### 📷 Image Analysis Instructions 1. Upload an image using the panel above 2. Ask a question about the image 3. The AI will analyze the image and respond in English **Example prompts:** - "What's in this image?" - "Describe this scene in detail" - "What objects can you identify?" - "What's happening in this picture?" """, visible=False ) with gr.Column(scale=1): # Model selection with gr.Group(): model_type = gr.Radio( choices=["Text Models", "Image Models"], value="Text Models", label="Model Type", info="Choose between text-only or vision models" ) # Function to update model dropdown based on selection def update_model_choices(model_type): if model_type == "Text Models": return { model_dropdown: gr.update(choices=list(TEXT_MODELS.keys()), value="Mistral Small"), image_input: gr.update(visible=False), image_instructions: gr.update(visible=False) } else: # Image Models return { model_dropdown: gr.update(choices=list(IMAGE_MODELS.keys()), value="Kimi Vision"), image_input: gr.update(visible=True), image_instructions: gr.update(visible=True) } model_dropdown = gr.Dropdown( choices=list(TEXT_MODELS.keys()), value="Mistral Small", label="Select AI Model", info="Choose from different AI models" ) # Connect the radio button to update the dropdown and show/hide image components model_type.change(fn=update_model_choices, inputs=model_type, outputs=[model_dropdown, image_input, image_instructions]) with gr.Tabs(): with gr.TabItem("Text Models"): gr.Markdown( """ ### Available Text Models - **Mistral Small**: Powerful language model from Mistral AI - **Mistral 3.1**: Earlier version of Mistral's model - **Gemma**: Google's lightweight language model - **Llama 3**: Meta's large language model """ ) with gr.TabItem("Image Models"): gr.Markdown( """ ### Available Image Models - **Kimi Vision**: Moonshot AI's vision-language model - **Qwen Vision**: Alibaba's vision-language model - **Gemma Vision**: Google's lightweight vision model - **Llama 3 Vision**: Meta's vision-language model *Note: All responses will be provided in English* """ ) # Token usage statistics usage_stats = gr.Markdown("### Token Usage Statistics\n\nNo usage data yet.") refresh_stats_btn = gr.Button("Refresh Usage Stats") # Set up the submit action def respond(message, chat_history, model, image, model_type): if not message.strip(): return "", chat_history # Check if we need to use image use_image = model_type == "Image Models" and image is not None # Add a note if using image model but no image uploaded if model_type == "Image Models" and image is None: return "", chat_history + [(message, "Please upload an image first before sending your message.")] # Process with or without image if use_image: bot_message = chat_with_ai(message, model, chat_history, image) else: bot_message = chat_with_ai(message, model, chat_history) chat_history.append((message, bot_message)) return "", chat_history # Connect the components submit_btn.click( respond, [msg, chatbot, model_dropdown, image_input, model_type], [msg, chatbot] ) msg.submit( respond, [msg, chatbot, model_dropdown, image_input, model_type], [msg, chatbot] ) # Clear chat and image def clear_all(): return None, None clear_btn.click(clear_all, None, [chatbot, image_input], queue=False) # Update usage statistics refresh_stats_btn.click(generate_usage_report, None, usage_stats) # For Hugging Face Spaces compatibility if __name__ == "__main__": # Launch the interface demo.launch(share=True) else: # For Hugging Face Spaces, we need to expose the app app = demo.launch(share=False, show_api=False)