|
from PIL import Image
|
|
from transformers import CLIPProcessor, CLIPModel
|
|
import torch
|
|
|
|
|
|
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
|
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
|
|
|
|
|
async def get_clip_embedding(image: Image.Image):
|
|
inputs = clip_processor(images=image, return_tensors="pt")
|
|
with torch.no_grad():
|
|
embedding = clip_model.get_image_features(**inputs)
|
|
embedding = embedding / embedding.norm(p=2, dim=-1)
|
|
return embedding.squeeze().tolist()
|
|
|