File size: 4,357 Bytes
c67b8bc
c38929e
5355f3c
3597770
5355f3c
 
 
c616ce0
5355f3c
3597770
5355f3c
 
c616ce0
c67b8bc
5355f3c
 
 
 
 
 
 
5c667be
5355f3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c616ce0
 
5355f3c
3597770
c616ce0
 
5355f3c
 
 
c616ce0
5355f3c
 
 
5c667be
5355f3c
 
 
 
5c667be
5355f3c
 
 
5c667be
5355f3c
492c2c8
5355f3c
 
 
c616ce0
5355f3c
3597770
5355f3c
3597770
5355f3c
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import streamlit as st
import pandas as pd
import torch
import ast
import os
import pickle
import re

from sentence_transformers import SentenceTransformer, util

# Set page config
st.set_page_config(page_title="NLP Product Recommender", layout="wide")

# ------------------ Load Data ------------------ #
@st.cache_data
def load_product_descriptions():
    df = pd.read_excel("productssales.xlsx", sheet_name="MarginEnquiryList (75)", engine='openpyxl')
    cleaned_df = df.iloc[1:].copy()
    cleaned_df.columns = df.iloc[0]
    product_df = cleaned_df[['Product Code', 'Product Description']].dropna().drop_duplicates()
    return product_df.reset_index(drop=True)

@st.cache_data
def load_corex_similarity():
    with open("recommendations.pkl", "rb") as f:
        data = pickle.load(f)
    return data['product_similarity']

product_df = load_product_descriptions()
product_names = product_df['Product Description'].astype(str).tolist()
product_similarity_df = load_corex_similarity()

# ------------------ Load SBERT ------------------ #
@st.cache_resource
def load_sbert_model():
    return SentenceTransformer("all-MiniLM-L6-v2")

model = load_sbert_model()
product_embeddings = model.encode(product_names, convert_to_tensor=True)

# ------------------ NLP Filter Extraction ------------------ #
def parse_filters(query):
    filters = {}

    category_match = re.search(r"(t-shirt|shoes|pants|jacket|jeans|hoodie|sneakers|bag|hat|hose|nipple|layflat|aerosol)", query, re.I)
    if category_match:
        filters['category'] = category_match.group(0).lower()

    color_match = re.search(r"(red|blue|green|black|white|yellow|pink|grey|orange|clear)", query, re.I)
    if color_match:
        filters['color'] = color_match.group(0).lower()

    price_match = re.search(r"under\s*\$?(\d+)", query, re.I)
    if price_match:
        filters['price'] = float(price_match.group(1))

    return filters

# ------------------ Semantic Search ------------------ #
def find_best_match(query, product_names, embeddings, filters=None, top_k=1):
    query_embedding = model.encode(query, convert_to_tensor=True)
    cosine_scores = util.pytorch_cos_sim(query_embedding, embeddings)[0]
    scored_products = sorted(zip(product_names, cosine_scores), key=lambda x: x[1], reverse=True)

    if filters:
        def passes_filters(name):
            name_lower = name.lower()
            return all(f in name_lower for f in filters.values() if isinstance(f, str))

        filtered = [p for p in scored_products if passes_filters(p[0])]
        if filtered:
            scored_products = filtered

    top_matches = [p[0] for p in scored_products[:top_k]]
    top_score = float(scored_products[0][1])
    return top_matches, top_score

# ------------------ Corex Recommender ------------------ #
def recommend_products_corex(product_name, top_n=5):
    if product_name not in product_similarity_df.index:
        return f"❌ Product '{product_name}' not found."
    sim_scores = product_similarity_df.loc[product_name].drop('Cluster', errors='ignore')
    sim_scores = sim_scores.sort_values(ascending=False)[1:top_n+1]
    return sim_scores.index.tolist()

# ------------------ UI ------------------ #
st.title("🧠 NLP Product Recommender (Corex Only)")
st.markdown("Ask for a product like: _'I need a red PVC hose under $100'_ and get Corex-based recommendations!")

# Initialize chat history
if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

# Chat input
query = st.chat_input("What product are you looking for?")
if query:
    st.session_state.chat_history.append({"role": "user", "content": query})

    filters = parse_filters(query)
    match_list, score = find_best_match(query, product_names, product_embeddings, filters)
    best_match = match_list[0]

    corex_recs = recommend_products_corex(best_match, top_n=5)
    if isinstance(corex_recs, list):
        assistant_response = f"""🔍 **Matched Product:** `{best_match}` (Score: {score:.2f})  
📦 **Recommended Products:**  
""" + "\n".join([f"- {prod}" for prod in corex_recs])
    else:
        assistant_response = corex_recs

    st.session_state.chat_history.append({"role": "assistant", "content": assistant_response})

# Display chat
for msg in st.session_state.chat_history:
    with st.chat_message(msg["role"]):
        st.markdown(msg["content"])