Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import torch | |
import ast | |
import os | |
import pickle | |
import re | |
from sentence_transformers import SentenceTransformer, util | |
# Set page config | |
st.set_page_config(page_title="NLP Product Recommender", layout="wide") | |
# ------------------ Load Data ------------------ # | |
def load_product_descriptions(): | |
df = pd.read_excel("productssales.xlsx", sheet_name="MarginEnquiryList (75)", engine='openpyxl') | |
cleaned_df = df.iloc[1:].copy() | |
cleaned_df.columns = df.iloc[0] | |
product_df = cleaned_df[['Product Code', 'Product Description']].dropna().drop_duplicates() | |
return product_df.reset_index(drop=True) | |
def load_corex_similarity(): | |
with open("recommendations.pkl", "rb") as f: | |
data = pickle.load(f) | |
return data['product_similarity'] | |
product_df = load_product_descriptions() | |
product_names = product_df['Product Description'].astype(str).tolist() | |
product_similarity_df = load_corex_similarity() | |
# ------------------ Load SBERT ------------------ # | |
def load_sbert_model(): | |
return SentenceTransformer("all-MiniLM-L6-v2") | |
model = load_sbert_model() | |
product_embeddings = model.encode(product_names, convert_to_tensor=True) | |
# ------------------ NLP Filter Extraction ------------------ # | |
def parse_filters(query): | |
filters = {} | |
category_match = re.search(r"(t-shirt|shoes|pants|jacket|jeans|hoodie|sneakers|bag|hat|hose|nipple|layflat|aerosol)", query, re.I) | |
if category_match: | |
filters['category'] = category_match.group(0).lower() | |
color_match = re.search(r"(red|blue|green|black|white|yellow|pink|grey|orange|clear)", query, re.I) | |
if color_match: | |
filters['color'] = color_match.group(0).lower() | |
price_match = re.search(r"under\s*\$?(\d+)", query, re.I) | |
if price_match: | |
filters['price'] = float(price_match.group(1)) | |
return filters | |
# ------------------ Semantic Search ------------------ # | |
def find_best_match(query, product_names, embeddings, filters=None, top_k=1): | |
query_embedding = model.encode(query, convert_to_tensor=True) | |
cosine_scores = util.pytorch_cos_sim(query_embedding, embeddings)[0] | |
scored_products = sorted(zip(product_names, cosine_scores), key=lambda x: x[1], reverse=True) | |
if filters: | |
def passes_filters(name): | |
name_lower = name.lower() | |
return all(f in name_lower for f in filters.values() if isinstance(f, str)) | |
filtered = [p for p in scored_products if passes_filters(p[0])] | |
if filtered: | |
scored_products = filtered | |
top_matches = [p[0] for p in scored_products[:top_k]] | |
top_score = float(scored_products[0][1]) | |
return top_matches, top_score | |
# ------------------ Corex Recommender ------------------ # | |
def recommend_products_corex(product_name, top_n=5): | |
if product_name not in product_similarity_df.index: | |
return f"β Product '{product_name}' not found." | |
sim_scores = product_similarity_df.loc[product_name].drop('Cluster', errors='ignore') | |
sim_scores = sim_scores.sort_values(ascending=False)[1:top_n+1] | |
return sim_scores.index.tolist() | |
# ------------------ UI ------------------ # | |
st.title("π§ NLP Product Recommender (Corex Only)") | |
st.markdown("Ask for a product like: _'I need a red PVC hose under $100'_ and get Corex-based recommendations!") | |
# Initialize chat history | |
if "chat_history" not in st.session_state: | |
st.session_state.chat_history = [] | |
# Chat input | |
query = st.chat_input("What product are you looking for?") | |
if query: | |
st.session_state.chat_history.append({"role": "user", "content": query}) | |
filters = parse_filters(query) | |
match_list, score = find_best_match(query, product_names, product_embeddings, filters) | |
best_match = match_list[0] | |
corex_recs = recommend_products_corex(best_match, top_n=5) | |
if isinstance(corex_recs, list): | |
assistant_response = f"""π **Matched Product:** `{best_match}` (Score: {score:.2f}) | |
π¦ **Recommended Products:** | |
""" + "\n".join([f"- {prod}" for prod in corex_recs]) | |
else: | |
assistant_response = corex_recs | |
st.session_state.chat_history.append({"role": "assistant", "content": assistant_response}) | |
# Display chat | |
for msg in st.session_state.chat_history: | |
with st.chat_message(msg["role"]): | |
st.markdown(msg["content"]) | |