Spaces:
Sleeping
Sleeping
File size: 4,357 Bytes
c67b8bc c38929e 5355f3c 3597770 5355f3c c616ce0 5355f3c 3597770 5355f3c c616ce0 c67b8bc 5355f3c 5c667be 5355f3c c616ce0 5355f3c 3597770 c616ce0 5355f3c c616ce0 5355f3c 5c667be 5355f3c 5c667be 5355f3c 5c667be 5355f3c 492c2c8 5355f3c c616ce0 5355f3c 3597770 5355f3c 3597770 5355f3c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import streamlit as st
import pandas as pd
import torch
import ast
import os
import pickle
import re
from sentence_transformers import SentenceTransformer, util
# Set page config
st.set_page_config(page_title="NLP Product Recommender", layout="wide")
# ------------------ Load Data ------------------ #
@st.cache_data
def load_product_descriptions():
df = pd.read_excel("productssales.xlsx", sheet_name="MarginEnquiryList (75)", engine='openpyxl')
cleaned_df = df.iloc[1:].copy()
cleaned_df.columns = df.iloc[0]
product_df = cleaned_df[['Product Code', 'Product Description']].dropna().drop_duplicates()
return product_df.reset_index(drop=True)
@st.cache_data
def load_corex_similarity():
with open("recommendations.pkl", "rb") as f:
data = pickle.load(f)
return data['product_similarity']
product_df = load_product_descriptions()
product_names = product_df['Product Description'].astype(str).tolist()
product_similarity_df = load_corex_similarity()
# ------------------ Load SBERT ------------------ #
@st.cache_resource
def load_sbert_model():
return SentenceTransformer("all-MiniLM-L6-v2")
model = load_sbert_model()
product_embeddings = model.encode(product_names, convert_to_tensor=True)
# ------------------ NLP Filter Extraction ------------------ #
def parse_filters(query):
filters = {}
category_match = re.search(r"(t-shirt|shoes|pants|jacket|jeans|hoodie|sneakers|bag|hat|hose|nipple|layflat|aerosol)", query, re.I)
if category_match:
filters['category'] = category_match.group(0).lower()
color_match = re.search(r"(red|blue|green|black|white|yellow|pink|grey|orange|clear)", query, re.I)
if color_match:
filters['color'] = color_match.group(0).lower()
price_match = re.search(r"under\s*\$?(\d+)", query, re.I)
if price_match:
filters['price'] = float(price_match.group(1))
return filters
# ------------------ Semantic Search ------------------ #
def find_best_match(query, product_names, embeddings, filters=None, top_k=1):
query_embedding = model.encode(query, convert_to_tensor=True)
cosine_scores = util.pytorch_cos_sim(query_embedding, embeddings)[0]
scored_products = sorted(zip(product_names, cosine_scores), key=lambda x: x[1], reverse=True)
if filters:
def passes_filters(name):
name_lower = name.lower()
return all(f in name_lower for f in filters.values() if isinstance(f, str))
filtered = [p for p in scored_products if passes_filters(p[0])]
if filtered:
scored_products = filtered
top_matches = [p[0] for p in scored_products[:top_k]]
top_score = float(scored_products[0][1])
return top_matches, top_score
# ------------------ Corex Recommender ------------------ #
def recommend_products_corex(product_name, top_n=5):
if product_name not in product_similarity_df.index:
return f"❌ Product '{product_name}' not found."
sim_scores = product_similarity_df.loc[product_name].drop('Cluster', errors='ignore')
sim_scores = sim_scores.sort_values(ascending=False)[1:top_n+1]
return sim_scores.index.tolist()
# ------------------ UI ------------------ #
st.title("🧠 NLP Product Recommender (Corex Only)")
st.markdown("Ask for a product like: _'I need a red PVC hose under $100'_ and get Corex-based recommendations!")
# Initialize chat history
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
# Chat input
query = st.chat_input("What product are you looking for?")
if query:
st.session_state.chat_history.append({"role": "user", "content": query})
filters = parse_filters(query)
match_list, score = find_best_match(query, product_names, product_embeddings, filters)
best_match = match_list[0]
corex_recs = recommend_products_corex(best_match, top_n=5)
if isinstance(corex_recs, list):
assistant_response = f"""🔍 **Matched Product:** `{best_match}` (Score: {score:.2f})
📦 **Recommended Products:**
""" + "\n".join([f"- {prod}" for prod in corex_recs])
else:
assistant_response = corex_recs
st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
# Display chat
for msg in st.session_state.chat_history:
with st.chat_message(msg["role"]):
st.markdown(msg["content"])
|