from PIL import Image from transformers import CLIPProcessor, CLIPModel import torch clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") async def get_clip_embedding(image: Image.Image): inputs = clip_processor(images=image, return_tensors="pt") with torch.no_grad(): embedding = clip_model.get_image_features(**inputs) embedding = embedding / embedding.norm(p=2, dim=-1) return embedding.squeeze().tolist()