import gradio as gr from fastapi import FastAPI, Request import uvicorn from sentence_transformers import SentenceTransformer from sentence_transformers.util import cos_sim from sentence_transformers.quantization import quantize_embeddings import spaces from gradio_client import Client import json import os app = FastAPI() @app.post("/v1/embeddings") async def openai_embeddings(request: Request): body = await request.json(); token = request.headers.get("authorization"); apiName = body.get("ApiName"); print(body); BearerToken = None; if not token is None: parts = token.split(' '); BearerToken = parts[1]; print("Using token..."); SpacePath = body['model'] print("Creating client..."); SpaceClient = Client(SpacePath, hf_token = BearerToken) if not apiName: apiName = "/embed" text = body['input']; result = SpaceClient.predict( text=text, api_name=apiName ) embeddings = json.loads(result); return { 'object': "list" ,'data': [{ 'object': "embeddings" ,'embedding': embeddings ,'index':0 }] ,'model': SpacePath ,'usage':{ 'prompt_tokens': 0 ,'total_tokens': 0 } } SpaceHost = os.environ.get("SPACE_HOST"); if not SpaceHost: SpaceHost = "localhost" with gr.Blocks() as demo: gr.Markdown(f""" This space allow you connect SQL Server 2025 with Hugging Face to generate embeddings! First, create a ZeroGPU Space that export an endpoint called embed. That endpoint must accept a parameter called text. Then, create the external model using T-SQL: ```sql CREATE EXTERNAL MODEL HuggingFace WITH ( LOCATION = 'https://{SpaceHost}/v1/embeddings', API_FORMAT = 'OpenAI', MODEL_TYPE = EMBEDDINGS, MODEL = 'user/space' ); ``` If you prefer, just type the space name into field bellow and we generate the right T-SQL command for you! """) SpaceName = gr.Textbox(label="Space", submit_btn=True) EndpointName = gr.Textbox(value="/embed", label = "EndpointName"); tsqlCommand = gr.Textbox(lines=5); def UpdateTsql(space): return f""" CREATE EXTERNAL MODEL HuggingFace WITH ( LOCATION = 'https://{SpaceHost}/v1/embeddings', API_FORMAT = 'OpenAI', MODEL_TYPE = EMBEDDINGS, MODEL = '{space}' ) """ SpaceName.submit(UpdateTsql, [SpaceName], [tsqlCommand]) ## hack para funcionar com ZeroGPU nesse mesmo space #print("Demo run..."); #(app2,url,other) = demo.launch(prevent_thread_lock=True, server_name=None, server_port=8000); # demo.close print("Mounting app..."); GradioApp = gr.mount_gradio_app(app, demo, path="", ssr_mode=False); if __name__ == '__main__': print("Running uviconr..."); uvicorn.run(GradioApp, host="0.0.0.0", port=7860)