IAMTFRMZA's picture
Update app.py
5355f3c verified
import streamlit as st
import pandas as pd
import torch
import ast
import os
import pickle
import re
from sentence_transformers import SentenceTransformer, util
# Set page config
st.set_page_config(page_title="NLP Product Recommender", layout="wide")
# ------------------ Load Data ------------------ #
@st.cache_data
def load_product_descriptions():
df = pd.read_excel("productssales.xlsx", sheet_name="MarginEnquiryList (75)", engine='openpyxl')
cleaned_df = df.iloc[1:].copy()
cleaned_df.columns = df.iloc[0]
product_df = cleaned_df[['Product Code', 'Product Description']].dropna().drop_duplicates()
return product_df.reset_index(drop=True)
@st.cache_data
def load_corex_similarity():
with open("recommendations.pkl", "rb") as f:
data = pickle.load(f)
return data['product_similarity']
product_df = load_product_descriptions()
product_names = product_df['Product Description'].astype(str).tolist()
product_similarity_df = load_corex_similarity()
# ------------------ Load SBERT ------------------ #
@st.cache_resource
def load_sbert_model():
return SentenceTransformer("all-MiniLM-L6-v2")
model = load_sbert_model()
product_embeddings = model.encode(product_names, convert_to_tensor=True)
# ------------------ NLP Filter Extraction ------------------ #
def parse_filters(query):
filters = {}
category_match = re.search(r"(t-shirt|shoes|pants|jacket|jeans|hoodie|sneakers|bag|hat|hose|nipple|layflat|aerosol)", query, re.I)
if category_match:
filters['category'] = category_match.group(0).lower()
color_match = re.search(r"(red|blue|green|black|white|yellow|pink|grey|orange|clear)", query, re.I)
if color_match:
filters['color'] = color_match.group(0).lower()
price_match = re.search(r"under\s*\$?(\d+)", query, re.I)
if price_match:
filters['price'] = float(price_match.group(1))
return filters
# ------------------ Semantic Search ------------------ #
def find_best_match(query, product_names, embeddings, filters=None, top_k=1):
query_embedding = model.encode(query, convert_to_tensor=True)
cosine_scores = util.pytorch_cos_sim(query_embedding, embeddings)[0]
scored_products = sorted(zip(product_names, cosine_scores), key=lambda x: x[1], reverse=True)
if filters:
def passes_filters(name):
name_lower = name.lower()
return all(f in name_lower for f in filters.values() if isinstance(f, str))
filtered = [p for p in scored_products if passes_filters(p[0])]
if filtered:
scored_products = filtered
top_matches = [p[0] for p in scored_products[:top_k]]
top_score = float(scored_products[0][1])
return top_matches, top_score
# ------------------ Corex Recommender ------------------ #
def recommend_products_corex(product_name, top_n=5):
if product_name not in product_similarity_df.index:
return f"❌ Product '{product_name}' not found."
sim_scores = product_similarity_df.loc[product_name].drop('Cluster', errors='ignore')
sim_scores = sim_scores.sort_values(ascending=False)[1:top_n+1]
return sim_scores.index.tolist()
# ------------------ UI ------------------ #
st.title("🧠 NLP Product Recommender (Corex Only)")
st.markdown("Ask for a product like: _'I need a red PVC hose under $100'_ and get Corex-based recommendations!")
# Initialize chat history
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
# Chat input
query = st.chat_input("What product are you looking for?")
if query:
st.session_state.chat_history.append({"role": "user", "content": query})
filters = parse_filters(query)
match_list, score = find_best_match(query, product_names, product_embeddings, filters)
best_match = match_list[0]
corex_recs = recommend_products_corex(best_match, top_n=5)
if isinstance(corex_recs, list):
assistant_response = f"""πŸ” **Matched Product:** `{best_match}` (Score: {score:.2f})
πŸ“¦ **Recommended Products:**
""" + "\n".join([f"- {prod}" for prod in corex_recs])
else:
assistant_response = corex_recs
st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})
# Display chat
for msg in st.session_state.chat_history:
with st.chat_message(msg["role"]):
st.markdown(msg["content"])