import gradio as gr from fastapi import FastAPI, Request import uvicorn import spaces from sentence_transformers import SentenceTransformer print("Loading embedding model"); Embedder = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1") app = FastAPI() @spaces.GPU def embed(text): query_embedding = Embedder.encode(text) return query_embedding.tolist(); @app.post("/v1/embeddings") async def openai_embed(req: Request): body = await req.json(); print(body); model = body['model']; text = body['input']; embeddings = embed(text) return { 'object': "list" ,'data': [{ 'object': "embeddings" ,'embedding': embeddings ,'index':0 }] ,'model': 'mixedbread-ai/mxbai-embed-large-v1' ,'usage':{ 'prompt_tokens': 0 ,'total_tokens': 0 } } with gr.Blocks() as demo: text = gr.Textbox(); embeddings = gr.Textbox() text.submit(embed, [text], [embeddings]); print("Demo run..."); (app2,url,other) = demo.launch(prevent_thread_lock=True, server_name=None, server_port=8000); GradioApp = gr.mount_gradio_app(app, demo, path="", ssr_mode=False); demo.close(); if __name__ == '__main__': print("Running uviconr..."); uvicorn.run(GradioApp, host="0.0.0.0", port=7860)