|
from pymongo import MongoClient
|
|
import numpy as np
|
|
import config
|
|
|
|
client = MongoClient(config.MongoURI)
|
|
db = client[config.MONGO_DB]
|
|
collection = db[config.MONGO_COLLECTION]
|
|
|
|
|
|
def cosine_similarity(query_embedding, embeddings):
|
|
query = np.array(query_embedding)
|
|
embeddings = np.array(embeddings)
|
|
|
|
query_norm = np.linalg.norm(query)
|
|
embeddings_norm = np.linalg.norm(embeddings, axis=1)
|
|
|
|
dot_products = embeddings @ query
|
|
similarities = dot_products / (embeddings_norm * query_norm)
|
|
|
|
return similarities.tolist()
|
|
|
|
async def get_similar_products(query_embedding):
|
|
|
|
|
|
products = list(collection.find({}, {
|
|
"_id": 1,
|
|
"embedding": 1,
|
|
"name": 1,
|
|
"category": 1,
|
|
"image_url": 1,
|
|
"tags": 1,
|
|
"target_audience": 1,
|
|
"brand": 1
|
|
}))
|
|
|
|
|
|
ids = [str(p["_id"]) for p in products]
|
|
embeddings = np.array([p["embedding"] for p in products])
|
|
|
|
|
|
similarities = cosine_similarity(query_embedding, embeddings)
|
|
|
|
results = []
|
|
brand_set = set()
|
|
|
|
for i, score in enumerate(similarities):
|
|
score_float = round(float(score), 2)
|
|
|
|
|
|
results.append({
|
|
"id": ids[i],
|
|
"similarity": score_float,
|
|
"name": products[i].get("name", "Unknown"),
|
|
"category": products[i].get("category", "Unknown"),
|
|
"image_url": products[i].get("image_url", ""),
|
|
"tags": products[i].get("tags", []),
|
|
"target_audience": products[i].get("target_audience", "unisex"),
|
|
"brand": products[i].get("brand", "unknown")
|
|
})
|
|
|
|
|
|
if score_float >= 0.7:
|
|
brand = products[i].get("brand", "")
|
|
if isinstance(brand, str):
|
|
brand = brand.strip().lower()
|
|
if brand and brand != "unknown":
|
|
brand_set.add(brand)
|
|
|
|
|
|
results.sort(key=lambda x: x["similarity"], reverse=True)
|
|
|
|
return {
|
|
"similar_products": results,
|
|
"filter_brand": list(brand_set)
|
|
}
|
|
|