File size: 4,709 Bytes
11b11b6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import os
import json
import re
import torch
import cloudinary
import cloudinary.uploader
from pymongo import MongoClient
from dotenv import load_dotenv
from transformers import CLIPProcessor, CLIPModel
from google import genai
from PIL import Image
# Load .env
load_dotenv()
# Environment variables
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
MONGO_URI = os.getenv("MONGO_URI")
MONGO_DB = os.getenv("MONGO_DB")
MONGO_COLLECTION = os.getenv("MONGO_COLLECTION")
CLOUD_NAME = os.getenv("CLOUDINARY_CLOUD_NAME")
API_KEY = os.getenv("CLOUDINARY_API_KEY")
API_SECRET = os.getenv("CLOUDINARY_API_SECRET")
# Setup Cloudinary
cloudinary.config(
cloud_name=CLOUD_NAME,
api_key=API_KEY,
api_secret=API_SECRET
)
# Setup MongoDB
mongo_client = MongoClient(MONGO_URI)
collection = mongo_client[MONGO_DB][MONGO_COLLECTION]
# Load CLIP Model
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
# Constants
IMAGE_FOLDER = "images"
CLOUDINARY_FOLDER = "Image visualizer images"
api_key = os.getenv("GEMINI_API_KEY")
# ✅ Initialize Gemini client
client = genai.Client(api_key=api_key)
def extract_metadata_with_gemini(image_path):
try:
image = Image.open(image_path).convert("RGB")
prompt = """
You are an AI assistant for an e-commerce visual search engine.
Given an image of a product, return only a JSON object with the following fields:
- name
- category (e.g. bag, footwear, dress, electronics)
- tags (5 words describing material, gender, style, color, etc.)
- color (as a color name, not code)
- brand (if visible, else "unknown")
- target_audience ("Men", "Women", "kids", "Unisex")
- price (If you can found else "unknown")
Return ONLY valid JSON. No markdown, no explanation.
"""
response = client.models.generate_content(
model="gemini-2.5-flash",
contents=[image, prompt],
)
# Use regex to extract JSON from any wrapping text/markdown
match = re.search(r"\{.*\}", response.text, re.DOTALL)
if match:
json_str = match.group(0)
return json.loads(json_str)
raise ValueError("No JSON object found in Gemini response.")
except Exception as e:
print(f"[❌] Gemini API error for {image_path}: {e}")
return {
"name": os.path.basename(image_path).split('.')[0],
"category": "unknown",
"tags": [],
"color": "unknown",
"brand": "unknown",
"target_audience": "unknown",
"price": "unknown"
}
def upload_image_to_cloudinary(image_path):
result = cloudinary.uploader.upload(
image_path,
folder=CLOUDINARY_FOLDER,
use_filename=True,
unique_filename=False,
overwrite=True
)
return result["secure_url"]
def get_clip_embedding(image_path):
image = Image.open(image_path).convert("RGB")
inputs = clip_processor(images=image, return_tensors="pt")
with torch.no_grad():
embedding = clip_model.get_image_features(**inputs)
embedding = embedding / embedding.norm(p=2, dim=-1)
return embedding.squeeze().tolist()
def process_all_images():
for file in os.listdir(IMAGE_FOLDER):
if file.lower().endswith((".jpg", ".jpeg", ".png")):
image_path = os.path.join(IMAGE_FOLDER, file)
print(f"\n📸 Processing {file}...")
# Step 1: Gemini metadata
meta = extract_metadata_with_gemini(image_path)
print(f" 🧠 Metadata: {meta}")
# Step 2: Upload to Cloudinary
image_url = upload_image_to_cloudinary(image_path)
print(f" ☁️ Uploaded to Cloudinary: {image_url}")
# Step 3: CLIP embedding
embedding = get_clip_embedding(image_path)
print(f" 🔗 CLIP Embedding extracted.")
# Step 4: Combine and store in MongoDB
document = {
"name": meta["name"],
"category": meta["category"],
"tags": meta["tags"],
"brand": meta["brand"],
"color": meta["color"],
"price": meta["price"],
"target_audience": meta["target_audience"],
"image_url": image_url,
"embedding": embedding
}
collection.insert_one(document)
print(" ✅ Stored in MongoDB.")
if __name__ == "__main__":
process_all_images()
|