Spaces:
Sleeping
Sleeping
| from PIL import Image | |
| import io | |
| from transformers import AutoTokenizer, CLIPProcessor, CLIPModel | |
| import torch | |
| # Load CLIP model and processor | |
| model_name = "openai/clip-vit-base-patch32" | |
| loaded_model = CLIPModel.from_pretrained(model_name) | |
| loaded_processor = CLIPProcessor.from_pretrained(model_name) | |
| def getTextEmbedding(text): | |
| # Preprocess the text | |
| print("tear") | |
| inputs_text = loaded_processor(text=[text], return_tensors="pt", padding=True) | |
| print("here") | |
| # Forward pass through the model | |
| with torch.no_grad(): | |
| # Get the text features | |
| text_features = loaded_model.get_text_features(input_ids=inputs_text.input_ids, attention_mask=inputs_text.attention_mask) | |
| print("bear") | |
| # Convert tensor to numpy array for better readability | |
| text_embedding = text_features.squeeze().numpy() | |
| print("done") | |
| return text_embedding | |
| def getImageEmbedding(binary_image_data): | |
| # Load and preprocess the image | |
| image = Image.open(io.BytesIO(binary_image_data)) | |
| inputs = loaded_processor(images=image, return_tensors="pt", padding=True) | |
| # Forward pass through the model | |
| with torch.no_grad(): | |
| # Get the image features | |
| image_features = loaded_model.get_image_features(pixel_values=inputs.pixel_values) | |
| # Convert tensor to numpy array for better readability | |
| image_embedding = image_features.squeeze().numpy() | |
| return image_embedding | |