import gradio as gr import torch import numpy as np from PIL import Image import os import json import base64 from io import BytesIO import requests from typing import Dict, List, Any, Optional from transformers.pipelines import pipeline # MCP imports from modelcontextprotocol.server import Server from modelcontextprotocol.server.gradio import GradioServerTransport from modelcontextprotocol.types import ( CallToolRequestSchema, ErrorCode, ListToolsRequestSchema, McpError, ) # Initialize the model model = pipeline("image-feature-extraction", model="nomic-ai/nomic-embed-vision-v1.5", trust_remote_code=True) # Function to generate embeddings from an image def generate_embedding(image): if image is None: return None # Convert to PIL Image if needed if not isinstance(image, Image.Image): image = Image.fromarray(image) try: # Generate embedding using the transformers pipeline result = model(image) # Process the result based on its type embedding_list = None # Handle different possible output types if isinstance(result, torch.Tensor): embedding_list = result.detach().cpu().numpy().flatten().tolist() elif isinstance(result, np.ndarray): embedding_list = result.flatten().tolist() elif isinstance(result, list): # If it's a list of tensors or arrays if result and isinstance(result[0], (torch.Tensor, np.ndarray)): embedding_list = result[0].flatten().tolist() if hasattr(result[0], 'flatten') else result[0] else: embedding_list = result else: # Try to convert to a list as a last resort try: if result is not None: embedding_list = list(result) else: print("Result is None") return None except: print(f"Couldn't convert result of type {type(result)} to list") return None # Ensure we have a valid embedding list if embedding_list is None: return None # Calculate embedding dimension embedding_dim = len(embedding_list) return { "embedding": embedding_list, "dimension": embedding_dim } except Exception as e: print(f"Error generating embedding: {str(e)}") return None # Gradio Interface with gr.Blocks() as demo: gr.Markdown("# Nomic Vision Embedding Model (nomic-ai/nomic-embed-vision-v1.5)") gr.Markdown("Upload an image to generate embeddings using the Nomic Vision model.") with gr.Row(): with gr.Column(): input_image = gr.Image(type="pil", label="Input Image") embed_btn = gr.Button("Generate Embedding") with gr.Column(): embedding_json = gr.JSON(label="Embedding Output") embedding_dim = gr.Textbox(label="Embedding Dimension") def update_embedding(img): result = generate_embedding(img) if result is None: return { embedding_json: None, embedding_dim: "No embedding generated" } return { embedding_json: result, embedding_dim: f"Dimension: {len(result['embedding'])}" } embed_btn.click( fn=update_embedding, inputs=[input_image], outputs=[embedding_json, embedding_dim] ) # MCP Server Implementation class NomicEmbeddingServer: def __init__(self): self.server = Server( { "name": "nomic-embedding-server", "version": "0.1.0", }, { "capabilities": { "tools": {}, }, } ) self.setup_tool_handlers() # Error handling self.server.onerror = lambda error: print(f"[MCP Error] {error}") def setup_tool_handlers(self): self.server.set_request_handler(ListToolsRequestSchema, self.handle_list_tools) self.server.set_request_handler(CallToolRequestSchema, self.handle_call_tool) async def handle_list_tools(self, request): return { "tools": [ { "name": "embed_image", "description": "Generate embeddings for an image using nomic-ai/nomic-embed-vision-v1.5", "inputSchema": { "type": "object", "properties": { "image_url": { "type": "string", "description": "URL of the image to embed", }, "image_data": { "type": "string", "description": "Base64-encoded image data (alternative to image_url)", }, }, "anyOf": [ {"required": ["image_url"]}, {"required": ["image_data"]}, ], }, } ] } async def handle_call_tool(self, request): if request.params.name != "embed_image": raise McpError( ErrorCode.MethodNotFound, f"Unknown tool: {request.params.name}" ) args = request.params.arguments try: # Handle image from URL if "image_url" in args: response = requests.get(args["image_url"]) image = Image.open(BytesIO(response.content)) # Handle image from base64 data elif "image_data" in args: image_data = base64.b64decode(args["image_data"]) image = Image.open(BytesIO(image_data)) else: raise McpError( ErrorCode.InvalidParams, "Either image_url or image_data must be provided" ) # Generate embedding result = generate_embedding(image) return { "content": [ { "type": "text", "text": json.dumps(result, indent=2), } ] } except Exception as e: return { "content": [ { "type": "text", "text": f"Error generating embedding: {str(e)}", } ], "isError": True, } # Initialize and run the MCP server embedding_server = NomicEmbeddingServer() # Connect the MCP server to the Gradio app transport = GradioServerTransport(demo) embedding_server.server.connect(transport) # Launch the Gradio app if __name__ == "__main__": # For Huggingface Spaces, we need to specify the server name and port demo.launch(server_name="0.0.0.0", server_port=7860)