import streamlit as st import pandas as pd import torch import ast import os import pickle import re from sentence_transformers import SentenceTransformer, util # Set page config st.set_page_config(page_title="NLP Product Recommender", layout="wide") # ------------------ Load Data ------------------ # @st.cache_data def load_product_descriptions(): df = pd.read_excel("productssales.xlsx", sheet_name="MarginEnquiryList (75)", engine='openpyxl') cleaned_df = df.iloc[1:].copy() cleaned_df.columns = df.iloc[0] product_df = cleaned_df[['Product Code', 'Product Description']].dropna().drop_duplicates() return product_df.reset_index(drop=True) @st.cache_data def load_corex_similarity(): with open("recommendations.pkl", "rb") as f: data = pickle.load(f) return data['product_similarity'] product_df = load_product_descriptions() product_names = product_df['Product Description'].astype(str).tolist() product_similarity_df = load_corex_similarity() # ------------------ Load SBERT ------------------ # @st.cache_resource def load_sbert_model(): return SentenceTransformer("all-MiniLM-L6-v2") model = load_sbert_model() product_embeddings = model.encode(product_names, convert_to_tensor=True) # ------------------ NLP Filter Extraction ------------------ # def parse_filters(query): filters = {} category_match = re.search(r"(t-shirt|shoes|pants|jacket|jeans|hoodie|sneakers|bag|hat|hose|nipple|layflat|aerosol)", query, re.I) if category_match: filters['category'] = category_match.group(0).lower() color_match = re.search(r"(red|blue|green|black|white|yellow|pink|grey|orange|clear)", query, re.I) if color_match: filters['color'] = color_match.group(0).lower() price_match = re.search(r"under\s*\$?(\d+)", query, re.I) if price_match: filters['price'] = float(price_match.group(1)) return filters # ------------------ Semantic Search ------------------ # def find_best_match(query, product_names, embeddings, filters=None, top_k=1): query_embedding = model.encode(query, convert_to_tensor=True) cosine_scores = util.pytorch_cos_sim(query_embedding, embeddings)[0] scored_products = sorted(zip(product_names, cosine_scores), key=lambda x: x[1], reverse=True) if filters: def passes_filters(name): name_lower = name.lower() return all(f in name_lower for f in filters.values() if isinstance(f, str)) filtered = [p for p in scored_products if passes_filters(p[0])] if filtered: scored_products = filtered top_matches = [p[0] for p in scored_products[:top_k]] top_score = float(scored_products[0][1]) return top_matches, top_score # ------------------ Corex Recommender ------------------ # def recommend_products_corex(product_name, top_n=5): if product_name not in product_similarity_df.index: return f"❌ Product '{product_name}' not found." sim_scores = product_similarity_df.loc[product_name].drop('Cluster', errors='ignore') sim_scores = sim_scores.sort_values(ascending=False)[1:top_n+1] return sim_scores.index.tolist() # ------------------ UI ------------------ # st.title("🧠 NLP Product Recommender (Corex Only)") st.markdown("Ask for a product like: _'I need a red PVC hose under $100'_ and get Corex-based recommendations!") # Initialize chat history if "chat_history" not in st.session_state: st.session_state.chat_history = [] # Chat input query = st.chat_input("What product are you looking for?") if query: st.session_state.chat_history.append({"role": "user", "content": query}) filters = parse_filters(query) match_list, score = find_best_match(query, product_names, product_embeddings, filters) best_match = match_list[0] corex_recs = recommend_products_corex(best_match, top_n=5) if isinstance(corex_recs, list): assistant_response = f"""🔍 **Matched Product:** `{best_match}` (Score: {score:.2f}) 📦 **Recommended Products:** """ + "\n".join([f"- {prod}" for prod in corex_recs]) else: assistant_response = corex_recs st.session_state.chat_history.append({"role": "assistant", "content": assistant_response}) # Display chat for msg in st.session_state.chat_history: with st.chat_message(msg["role"]): st.markdown(msg["content"])