File size: 847 Bytes
995b7bc
 
 
 
4355acb
995b7bc
4355acb
 
995b7bc
 
 
 
 
 
 
4355acb
995b7bc
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from fastapi import FastAPI
from transformers import AutoTokenizer, AutoModel
import torch

# เปลี่ยน cache directory เป็น /tmp
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir="/tmp")
model = AutoModel.from_pretrained(MODEL_NAME, cache_dir="/tmp")

app = FastAPI()

def get_embedding(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = model(**inputs)
    embedding = outputs.last_hidden_state.mean(dim=1)
    return embedding.squeeze().tolist()

@app.post("/embed")
async def embed_text(data: dict):
    text = data.get("text", "")
    if not text:
        return {"error": "No text provided"}
    vector = get_embedding(text)
    return {"text": text, "embedding": vector}