Spaces:

SHIKARICHACHA
/

Multi_AI_Agent

Sleeping

File size: 12,135 Bytes

import gradio as gr
import os
from openai import OpenAI
import json
import time

# OpenRouter API key
OPENROUTER_API_KEY = "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"

# Available models categorized by type
TEXT_MODELS = {
    "Mistral Small": "mistralai/mistral-small-3.2-24b-instruct:free",
    "Claude 3 Haiku": "anthropic/claude-3-haiku",
    "Mistral 3.1": "mistralai/mistral-small-3.1-24b-instruct:free",
    "Gemma": "google/gemma-3-4b-it:free",
}

# Available image models
IMAGE_MODELS = {
    "Kimi Vision": "moonshotai/kimi-vl-a3b-thinking:free",
    "Qwen Vision": "qwen/qwen2.5-vl-32b-instruct:free",
    "Gemma Vision": "google/gemma-3-4b-it:free",
    "Llama 3 Vision": "meta-llama/llama-3.2-11b-vision-instruct:free",
}

# Token usage tracking
token_usage = {
    "total_input_tokens": 0,
    "total_output_tokens": 0,
    "model_usage": {}
}

# Initialize chat history
history = []

# Helper function to convert image to base64
import base64
from PIL import Image
import io

def image_to_base64(image):
    """Convert an image to base64 encoding"""
    # If image is a file path
    if isinstance(image, str):
        with open(image, "rb") as img_file:
            return base64.b64encode(img_file.read()).decode()
    
    # If image is already a PIL Image
    buffered = io.BytesIO()
    image.save(buffered, format="JPEG")
    return base64.b64encode(buffered.getvalue()).decode()

def chat_with_ai(message, model_name, history, image=None):
    """Chat with the selected OpenRouter model"""
    try:
        # Initialize OpenAI client with OpenRouter base URL
        client = OpenAI(
            base_url="https://openrouter.ai/api/v1",
            api_key=OPENROUTER_API_KEY,
        )
        
        # Format the conversation history for the API
        messages = []
        for human, assistant in history:
            messages.append({"role": "user", "content": human})
            messages.append({"role": "assistant", "content": assistant})
        
        # Check if we're using an image model and have an image
        if model_name in IMAGE_MODELS and image is not None:
            # Convert image to base64
            img_base64 = image_to_base64(image)
            
            # Add the current message with image
            messages.append({
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": message
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{img_base64}"
                        }
                    }
                ]
            })
        else:
            # Add the current message (text only)
            messages.append({"role": "user", "content": message})
        
        # Get the model ID based on the selected model name
        model_id = TEXT_MODELS.get(model_name) or IMAGE_MODELS.get(model_name)
        
        # Create the completion request
        start_time = time.time()
        completion = client.chat.completions.create(
            extra_headers={
                "HTTP-Referer": "https://gradio-openrouter-interface.com",
                "X-Title": "Gradio OpenRouter Interface",
            },
            model=model_id,
            messages=messages
        )
        end_time = time.time()
        
        # Update token usage statistics
        input_tokens = completion.usage.prompt_tokens
        output_tokens = completion.usage.completion_tokens
        
        token_usage["total_input_tokens"] += input_tokens
        token_usage["total_output_tokens"] += output_tokens
        
        if model_name not in token_usage["model_usage"]:
            token_usage["model_usage"][model_name] = {
                "input_tokens": 0,
                "output_tokens": 0,
                "requests": 0,
                "avg_response_time": 0
            }
        
        # Update model-specific usage
        model_stats = token_usage["model_usage"][model_name]
        model_stats["input_tokens"] += input_tokens
        model_stats["output_tokens"] += output_tokens
        model_stats["requests"] += 1
        
        # Calculate running average of response time
        response_time = end_time - start_time
        model_stats["avg_response_time"] = ((model_stats["avg_response_time"] * (model_stats["requests"] - 1)) + response_time) / model_stats["requests"]
        
        # Return the model's response
        return completion.choices[0].message.content
    
    except Exception as e:
        return f"Error: {str(e)}"

# Function to generate token usage report
def generate_usage_report():
    report = f"### Token Usage Statistics\n\n"
    report += f"**Total Input Tokens:** {token_usage['total_input_tokens']}\n"
    report += f"**Total Output Tokens:** {token_usage['total_output_tokens']}\n\n"
    
    report += "### Model-Specific Usage\n\n"
    report += "| Model | Input Tokens | Output Tokens | Requests | Avg Response Time (s) |\n"
    report += "|-------|--------------|---------------|----------|----------------------|\n"
    
    for model, stats in token_usage["model_usage"].items():
        report += f"| {model} | {stats['input_tokens']} | {stats['output_tokens']} | {stats['requests']} | {stats['avg_response_time']:.2f} |\n"
    
    return report

# Create the Gradio interface
with gr.Blocks(title="OpenRouter AI Multi-Modal Interface", css="style.css") as demo:
    gr.Markdown(
        """
        # 🤖 OpenRouter AI Multi-Modal Interface
        
        Chat with multiple AI models powered by OpenRouter API - both text-only and vision models!
        
        *Select your model type (Text or Image), choose a specific model, and start interacting!*
        
        **All responses from image models will be provided in English**
        """
    )
    
    with gr.Row():
        with gr.Column(scale=4):
            # Chat interface
            chatbot = gr.Chatbot(height=500, label="Conversation")
            
            # Image upload (initially hidden)
            image_input = gr.Image(type="pil", label="Upload Image for Analysis", visible=False)
            
            msg = gr.Textbox(label="Your message", placeholder="Type your message here...")
            
            with gr.Row():
                submit_btn = gr.Button("Send", variant="primary")
                clear_btn = gr.Button("Clear Chat")
                
            # Instructions for image models
            image_instructions = gr.Markdown(
                """
                ### 📷 Image Analysis Instructions
                1. Upload an image using the panel above
                2. Ask a question about the image
                3. The AI will analyze the image and respond in English
                
                **Example prompts:**
                - "What's in this image?"
                - "Describe this scene in detail"
                - "What objects can you identify?"
                - "What's happening in this picture?"
                """,
                visible=False
            )
        
        with gr.Column(scale=1):
            # Model selection
            with gr.Group():
                model_type = gr.Radio(
                    choices=["Text Models", "Image Models"],
                    value="Text Models",
                    label="Model Type",
                    info="Choose between text-only or vision models"
                )
                
                # Function to update model dropdown based on selection
                def update_model_choices(model_type):
                    if model_type == "Text Models":
                        return {
                            model_dropdown: gr.update(choices=list(TEXT_MODELS.keys()), value="Mistral Small"),
                            image_input: gr.update(visible=False),
                            image_instructions: gr.update(visible=False)
                        }
                    else:  # Image Models
                        return {
                            model_dropdown: gr.update(choices=list(IMAGE_MODELS.keys()), value="Kimi Vision"),
                            image_input: gr.update(visible=True),
                            image_instructions: gr.update(visible=True)
                        }
                
                model_dropdown = gr.Dropdown(
                    choices=list(TEXT_MODELS.keys()), 
                    value="Mistral Small", 
                    label="Select AI Model",
                    info="Choose from different AI models"
                )
                
                # Connect the radio button to update the dropdown and show/hide image components
                model_type.change(fn=update_model_choices, inputs=model_type, outputs=[model_dropdown, image_input, image_instructions])
                
                with gr.Tabs():
                    with gr.TabItem("Text Models"):
                        gr.Markdown(
                            """
                            ### Available Text Models
                            - **Mistral Small**: Powerful language model from Mistral AI
                            - **Mistral 3.1**: Earlier version of Mistral's model
                            - **Gemma**: Google's lightweight language model
                            - **Llama 3**: Meta's large language model
                            """
                        )
                    
                    with gr.TabItem("Image Models"):
                        gr.Markdown(
                            """
                            ### Available Image Models
                            - **Kimi Vision**: Moonshot AI's vision-language model
                        
                            - **Qwen Vision**: Alibaba's vision-language model
                            - **Gemma Vision**: Google's lightweight vision model
                            - **Llama 3 Vision**: Meta's vision-language model
                            
                            *Note: All responses will be provided in English*
                            """
                        )
            
            # Token usage statistics
            usage_stats = gr.Markdown("### Token Usage Statistics\n\nNo usage data yet.")
            refresh_stats_btn = gr.Button("Refresh Usage Stats")
    
    # Set up the submit action
    def respond(message, chat_history, model, image, model_type):
        if not message.strip():
            return "", chat_history
        
        # Check if we need to use image
        use_image = model_type == "Image Models" and image is not None
        
        # Add a note if using image model but no image uploaded
        if model_type == "Image Models" and image is None:
            return "", chat_history + [(message, "Please upload an image first before sending your message.")]
        
        # Process with or without image
        if use_image:
            bot_message = chat_with_ai(message, model, chat_history, image)
        else:
            bot_message = chat_with_ai(message, model, chat_history)
        
        chat_history.append((message, bot_message))
        return "", chat_history
    
    # Connect the components
    submit_btn.click(
        respond, 
        [msg, chatbot, model_dropdown, image_input, model_type], 
        [msg, chatbot]
    )
    msg.submit(
        respond, 
        [msg, chatbot, model_dropdown, image_input, model_type], 
        [msg, chatbot]
    )
    
    # Clear chat and image
    def clear_all():
        return None, None
    
    clear_btn.click(clear_all, None, [chatbot, image_input], queue=False)
    
    # Update usage statistics
    refresh_stats_btn.click(generate_usage_report, None, usage_stats)

# For Hugging Face Spaces compatibility
if __name__ == "__main__":
    # Launch the interface
    demo.launch(share=True)
else:
    # For Hugging Face Spaces, we need to expose the app
    app = demo.launch(share=False, show_api=False)