from fastapi import FastAPI from pydantic import BaseModel from transformers import AutoTokenizer, AutoModel import torch # Load model & tokenizer model_name = "sentence-transformers/all-MiniLM-L6-v2" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModel.from_pretrained(model_name) app = FastAPI() # Request body format class TextInput(BaseModel): text: str def get_embedding(text): inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) with torch.no_grad(): outputs = model(**inputs) embeddings = outputs.last_hidden_state.mean(dim=1) # mean pooling return embeddings[0].tolist() @app.post("/embed") def embed_text(data: TextInput): embedding = get_embedding(data.text) return {"embedding": embedding}