PD03 commited on
Commit
98d76ab
·
verified ·
1 Parent(s): bd8d56b

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +3 -3
  2. app.py +127 -102
  3. requirements.txt +2 -1
README.md CHANGED
@@ -27,10 +27,10 @@ streamlit run app.py
27
 
28
  ## Deploy to Hugging Face Spaces
29
  1. Create a new Space → **SDK: Streamlit**.
30
- 2. Upload `app.py` and `requirements.txt` (and this README if you like).
31
  3. The Space will build automatically and launch the app.
32
 
33
  ## Notes
34
  - Data is **synthetic** but embeds realistic pricing, discounting, cost, and elasticity signals.
35
- - SHAP is computed on a sample for responsiveness.
36
- - Recommendations are illustrative; in production, add policy bounds, portfolio constraints, and cost/promo feasibility tables.
 
27
 
28
  ## Deploy to Hugging Face Spaces
29
  1. Create a new Space → **SDK: Streamlit**.
30
+ 2. Upload `app.py`, `requirements.txt`, and this `README.md`.
31
  3. The Space will build automatically and launch the app.
32
 
33
  ## Notes
34
  - Data is **synthetic** but embeds realistic pricing, discounting, cost, and elasticity signals.
35
+ - SHAP is computed **on demand** with a sample size control for performance.
36
+ - Recommendations are illustrative; in production, add policy bounds, portfolio constraints, and cost/promo feasibility tables.
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import streamlit as st
2
  import numpy as np
3
  import pandas as pd
@@ -20,7 +21,7 @@ st.set_page_config(page_title="AI-Driven Daily Gross Margin", layout="wide")
20
  # 1) Synthetic data generation
21
  # -----------------------------
22
  @st.cache_data(show_spinner=False)
23
- def generate_synthetic_data(days=90, seed=42, rows_per_day=1200):
24
  rng = np.random.default_rng(seed)
25
  start_date = datetime.today().date() - timedelta(days=days)
26
  dates = pd.date_range(start_date, periods=days, freq="D")
@@ -82,7 +83,6 @@ def generate_synthetic_data(days=90, seed=42, rows_per_day=1200):
82
  # Quantity via elasticity around a reference price
83
  eps = np.array([seg_epsilon[(pp, rr, cc)] for pp, rr, cc in zip(prod, reg, ch)])
84
  ref_price = np.array([base_price[x] for x in prod])
85
- # expected qty relative to ref price
86
  qty_mu = np.exp(eps * (net_price - ref_price) / np.maximum(ref_price, 1e-6))
87
  qty = np.maximum(1, rng.poisson(8 * dow_mult * macro * qty_mu))
88
 
@@ -115,11 +115,9 @@ def generate_synthetic_data(days=90, seed=42, rows_per_day=1200):
115
  # 2) Modeling utilities
116
  # -----------------------------
117
  def build_features(df: pd.DataFrame):
118
- # Basic feature set
119
  feats_num = ["net_price", "unit_cost", "qty", "discount_pct", "list_price", "dow"]
120
  feats_cat = ["product", "region", "channel"]
121
 
122
- # Rolling features for elasticity/momentum (7D trailing by segment)
123
  df = df.sort_values("date").copy()
124
  seg = ["product", "region", "channel"]
125
  df["price_per_unit"] = df["net_price"]
@@ -134,7 +132,7 @@ def build_features(df: pd.DataFrame):
134
  return df, feats_num, feats_cat, target
135
 
136
  @st.cache_resource(show_spinner=False)
137
- def train_model(df: pd.DataFrame, feats_num, feats_cat, target):
138
  X = df[feats_num + feats_cat]
139
  y = df[target]
140
 
@@ -144,7 +142,7 @@ def train_model(df: pd.DataFrame, feats_num, feats_cat, target):
144
  ("num", "passthrough", feats_num),
145
  ]
146
  )
147
- model = RandomForestRegressor(n_estimators=400, max_depth=None, random_state=42, n_jobs=-1, min_samples_leaf=2)
148
  pipe = Pipeline([("pre", pre), ("rf", model)])
149
 
150
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)
@@ -154,38 +152,43 @@ def train_model(df: pd.DataFrame, feats_num, feats_cat, target):
154
  r2 = r2_score(y_test, pred)
155
  mae = mean_absolute_error(y_test, pred)
156
 
157
- # SHAP on a sample for speed
 
 
 
 
 
158
  preproc = pipe.named_steps["pre"]
159
  rf = pipe.named_steps["rf"]
160
  feature_names = list(preproc.named_transformers_["cat"].get_feature_names_out(feats_cat)) + feats_num
161
 
162
- # transform a reasonable sample size
163
- sample_idx = np.random.choice(len(X_test), size=min(2500, len(X_test)), replace=False)
164
- X_test_t = preproc.transform(X_test.iloc[sample_idx])
 
 
 
 
 
 
 
165
  explainer = shap.TreeExplainer(rf)
166
- shap_values = explainer.shap_values(X_test_t)
167
  expected_value = explainer.expected_value
168
 
169
  shap_df = pd.DataFrame(shap_values, columns=feature_names)
170
- # also keep the corresponding original rows for joins
171
- X_test_sample = X_test.iloc[sample_idx].reset_index(drop=True)
172
-
173
- return pipe, {"r2": r2, "mae": mae}, feature_names, shap_df, expected_value, X_test_sample
174
 
175
  def estimate_segment_elasticity(df: pd.DataFrame, product, region, channel):
176
  seg_df = df[(df["product"]==product)&(df["region"]==region)&(df["channel"]==channel)]
177
- # Require minimal spread
178
  if len(seg_df) < 100 or seg_df["net_price"].std() < 1e-6 or seg_df["qty"].std() < 1e-6:
179
  return -0.5, False
180
  x = np.log(np.clip(seg_df["net_price"].values, 1e-6, None)).reshape(-1,1)
181
  y = np.log(np.clip(seg_df["qty"].values, 1e-6, None))
182
  lin = LinearRegression().fit(x, y)
183
- # slope is elasticity (d ln Q / d ln P)
184
  return float(lin.coef_[0]), True
185
 
186
  def simulate_action(segment_df: pd.DataFrame, elasticity, delta_discount=0.0, delta_unit_cost=0.0):
187
- # Simulate changing discount (affects price) and unit cost
188
- # Use last-day averages as baseline
189
  if segment_df.empty:
190
  return None
191
  base = segment_df.iloc[-1]
@@ -193,12 +196,11 @@ def simulate_action(segment_df: pd.DataFrame, elasticity, delta_discount=0.0, de
193
  c0 = base["unit_cost"]
194
  q0 = base["qty"]
195
  d0 = base["discount_pct"]
196
- # Apply action
197
  new_discount = np.clip(d0 + delta_discount, 0.0, 0.45)
198
  p1 = max(0.01, base["list_price"] * (1 - new_discount))
199
  c1 = max(0.01, c0 + delta_unit_cost)
200
 
201
- # Volume change via elasticity around p0
202
  if p0 <= 0:
203
  q1 = q0
204
  else:
@@ -229,13 +231,23 @@ st.caption("Synthetic demo: Revenue − COGS focus • Driver analysis with SHAP
229
 
230
  with st.sidebar:
231
  st.header("⚙️ Controls")
232
- days = st.slider("History (days)", 45, 180, 90, 1)
233
- rows_per_day = st.slider("Rows per day", 300, 3000, 1200, 100)
 
 
 
 
 
 
234
  seed = st.number_input("Random seed", value=42, step=1)
235
  st.markdown("---")
236
  st.markdown("**Training**")
 
237
  st.caption("Model: RandomForestRegressor (SHAP via TreeExplainer)")
238
  st.markdown("---")
 
 
 
239
  st.markdown("**What-if Defaults**")
240
  default_disc_step = st.slider("Default discount step (points)", -5.0, 5.0, -1.5, 0.1)
241
  default_cost_step = st.slider("Default unit cost change", -5.0, 5.0, 0.0, 0.1)
@@ -265,50 +277,66 @@ fig.update_yaxes(tickformat=".1%")
265
  st.plotly_chart(fig, use_container_width=True)
266
 
267
  # Train
268
- with st.spinner("Training model & computing SHAP…"):
269
- pipe, metrics, feature_names, shap_df, expected_value, X_test_sample = train_model(df_feat, feats_num, feats_cat, target)
270
 
271
  st.success(f"Model trained • R²={metrics['r2']:.3f} • MAE={metrics['mae']:.4f} (GM% points)")
272
 
273
- # Global driver importance
274
  st.subheader("🔍 Driver Analysis (Global)")
275
- mean_abs = shap_df.abs().mean().sort_values(ascending=False)
276
- imp_df = pd.DataFrame({"feature": mean_abs.index, "mean_abs_shap": mean_abs.values})
277
- st.dataframe(imp_df.head(15), use_container_width=True)
278
-
279
- # Optional: SHAP bar plot
280
- fig2, ax = plt.subplots(figsize=(8,5))
281
- imp_df.head(20).iloc[::-1].plot(kind="barh", x="feature", y="mean_abs_shap", ax=ax)
282
- ax.set_title("Top Drivers — Mean |SHAP| (GM%)")
283
- ax.set_xlabel("Mean |SHAP| contribution")
284
- st.pyplot(fig2, clear_figure=True)
285
-
286
- # Segment analysis using net SHAP sign for key features
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  st.subheader("🧭 Where did it happen? (Segment view)")
288
- # We'll focus on key interpretable features
289
- key_feats = [f for f in feature_names if any(k in f for k in ["discount", "price_per_unit", "cost_per_unit","unit_cost","net_price"])]
290
- joined = pd.concat([X_test_sample.reset_index(drop=True), shap_df[key_feats].reset_index(drop=True)], axis=1)
291
- # net directional impact
292
- grp = joined.groupby(["product","region","channel"]).mean(numeric_only=True)
293
- # Summaries
294
- rank_cols = [c for c in grp.columns if c not in ["product","region","channel"]]
295
- top_bad = grp[rank_cols].sum(axis=1).sort_values().head(10)
296
- top_good = grp[rank_cols].sum(axis=1).sort_values(ascending=False).head(10)
297
-
298
- c1, c2 = st.columns(2)
299
- with c1:
300
- st.caption("Segments dragging GM% (more negative net SHAP)")
301
- st.write(top_bad.to_frame("net_shap_sum").round(4))
302
- with c2:
303
- st.caption("Segments lifting GM% (more positive net SHAP)")
304
- st.write(top_good.to_frame("net_shap_sum").round(4))
305
 
306
  # -----------------------------
307
  # What-if Simulator
308
  # -----------------------------
309
- st.header("🧪 Whatif Simulator & Recommendations")
310
 
311
- # Segment picker based on last-day data
312
  last_day = df["date"].max()
313
  seg_today = df[df["date"]==last_day][["product","region","channel"]].drop_duplicates().sort_values(["product","region","channel"])
314
  seg_choice = st.selectbox("Choose a segment (product × region × channel):",
@@ -322,9 +350,9 @@ st.caption(f"Estimated price elasticity for segment: **{elasticity:.2f}** ({'ok'
322
 
323
  c3, c4 = st.columns(2)
324
  with c3:
325
- delta_disc = st.slider("Change discount (percentage points)", -10.0, 10.0, default_disc_step, 0.1)
326
  with c4:
327
- delta_cost = st.slider("Change unit cost (absolute)", -5.0, 5.0, default_cost_step, 0.1)
328
 
329
  sim_res = simulate_action(seg_hist, elasticity, delta_discount=delta_disc/100.0, delta_unit_cost=delta_cost)
330
 
@@ -343,48 +371,45 @@ if sim_res is not None:
343
  # Auto Recommendations
344
  # -----------------------------
345
  st.subheader("💡 Top Recommendations (ranked by expected uplift)")
346
-
347
- # Identify candidate segments from last day (using negative discount/cost SHAP in recent window)
348
- recent_join = joined.copy()
349
- recent_join["key"] = recent_join["product"] + "|" + recent_join["region"] + "|" + recent_join["channel"]
350
-
351
- # score segments: more negative (discount/cost) net shap → candidate for improvement
352
- cand_cols = [c for c in key_feats if ("discount" in c or "cost" in c or "price" in c)]
353
- seg_scores = recent_join.groupby("key")[cand_cols].mean().sum(axis=1)
354
- worst_keys = seg_scores.sort_values().head(20).index.tolist()
355
-
356
- recs = []
357
- seen = set()
358
- for key in worst_keys:
359
- p, r, c = key.split("|")
360
- if key in seen:
361
- continue
362
- seen.add(key)
363
- hist = df[(df["product"]==p)&(df["region"]==r)&(df["channel"]==c)].sort_values("date")
364
- if hist.empty:
365
- continue
366
- eps, _ = estimate_segment_elasticity(hist, p, r, c)
367
- # propose tightening discount a bit; sample 1.0–2.0 pts
368
- prop_disc_pts = -np.clip(abs(seg_scores[key])*10, 0.5, 2.0)
369
- sim = simulate_action(hist, eps, delta_discount=prop_disc_pts/100.0, delta_unit_cost=0.0)
370
- if sim is None:
371
- continue
372
- recs.append({
373
- "segment": f"{p} • {r} • {c}",
374
- "action": f"Reduce discount by {abs(prop_disc_pts):.1f} pts",
375
- "expected_gm_uplift": sim["gm_delta_value"],
376
- "new_discount_pct": sim["new_discount"]*100,
377
- "elasticity": eps,
378
- "notes": "Driven by negative discount/price SHAP"
379
- })
380
-
381
- rec_df = pd.DataFrame(recs).sort_values("expected_gm_uplift", ascending=False)
382
- st.dataframe(rec_df.head(15), use_container_width=True)
383
-
384
- st.download_button("⬇️ Download recommendations (CSV)",
385
- data=rec_df.to_csv(index=False).encode("utf-8"),
386
- file_name="gm_recommendations.csv",
387
- mime="text/csv")
388
 
389
  st.markdown("---")
390
- st.caption("Demo only — synthetic data & simplified economics. For production, plug in your CDS feed and business constraints.")
 
1
+
2
  import streamlit as st
3
  import numpy as np
4
  import pandas as pd
 
21
  # 1) Synthetic data generation
22
  # -----------------------------
23
  @st.cache_data(show_spinner=False)
24
+ def generate_synthetic_data(days=60, seed=42, rows_per_day=600):
25
  rng = np.random.default_rng(seed)
26
  start_date = datetime.today().date() - timedelta(days=days)
27
  dates = pd.date_range(start_date, periods=days, freq="D")
 
83
  # Quantity via elasticity around a reference price
84
  eps = np.array([seg_epsilon[(pp, rr, cc)] for pp, rr, cc in zip(prod, reg, ch)])
85
  ref_price = np.array([base_price[x] for x in prod])
 
86
  qty_mu = np.exp(eps * (net_price - ref_price) / np.maximum(ref_price, 1e-6))
87
  qty = np.maximum(1, rng.poisson(8 * dow_mult * macro * qty_mu))
88
 
 
115
  # 2) Modeling utilities
116
  # -----------------------------
117
  def build_features(df: pd.DataFrame):
 
118
  feats_num = ["net_price", "unit_cost", "qty", "discount_pct", "list_price", "dow"]
119
  feats_cat = ["product", "region", "channel"]
120
 
 
121
  df = df.sort_values("date").copy()
122
  seg = ["product", "region", "channel"]
123
  df["price_per_unit"] = df["net_price"]
 
132
  return df, feats_num, feats_cat, target
133
 
134
  @st.cache_resource(show_spinner=False)
135
+ def train_model(df: pd.DataFrame, feats_num, feats_cat, target, n_estimators=250):
136
  X = df[feats_num + feats_cat]
137
  y = df[target]
138
 
 
142
  ("num", "passthrough", feats_num),
143
  ]
144
  )
145
+ model = RandomForestRegressor(n_estimators=n_estimators, max_depth=None, random_state=42, n_jobs=-1, min_samples_leaf=3)
146
  pipe = Pipeline([("pre", pre), ("rf", model)])
147
 
148
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)
 
152
  r2 = r2_score(y_test, pred)
153
  mae = mean_absolute_error(y_test, pred)
154
 
155
+ # Store components for SHAP later (on-demand)
156
+ return pipe, {"r2": r2, "mae": mae}, X_test
157
+
158
+ @st.cache_resource(show_spinner=False)
159
+ def compute_shap(pipe, X_sample, feats_num, feats_cat, shap_sample=800, seed=42):
160
+ np.random.seed(seed)
161
  preproc = pipe.named_steps["pre"]
162
  rf = pipe.named_steps["rf"]
163
  feature_names = list(preproc.named_transformers_["cat"].get_feature_names_out(feats_cat)) + feats_num
164
 
165
+ if len(X_sample) > shap_sample:
166
+ sample_idx = np.random.choice(len(X_sample), size=shap_sample, replace=False)
167
+ X_sample = X_sample.iloc[sample_idx]
168
+
169
+ X_t = preproc.transform(X_sample)
170
+ try:
171
+ X_t = X_t.toarray()
172
+ except Exception:
173
+ pass
174
+
175
  explainer = shap.TreeExplainer(rf)
176
+ shap_values = explainer.shap_values(X_t)
177
  expected_value = explainer.expected_value
178
 
179
  shap_df = pd.DataFrame(shap_values, columns=feature_names)
180
+ return shap_df, expected_value, X_sample.reset_index(drop=True), feature_names
 
 
 
181
 
182
  def estimate_segment_elasticity(df: pd.DataFrame, product, region, channel):
183
  seg_df = df[(df["product"]==product)&(df["region"]==region)&(df["channel"]==channel)]
 
184
  if len(seg_df) < 100 or seg_df["net_price"].std() < 1e-6 or seg_df["qty"].std() < 1e-6:
185
  return -0.5, False
186
  x = np.log(np.clip(seg_df["net_price"].values, 1e-6, None)).reshape(-1,1)
187
  y = np.log(np.clip(seg_df["qty"].values, 1e-6, None))
188
  lin = LinearRegression().fit(x, y)
 
189
  return float(lin.coef_[0]), True
190
 
191
  def simulate_action(segment_df: pd.DataFrame, elasticity, delta_discount=0.0, delta_unit_cost=0.0):
 
 
192
  if segment_df.empty:
193
  return None
194
  base = segment_df.iloc[-1]
 
196
  c0 = base["unit_cost"]
197
  q0 = base["qty"]
198
  d0 = base["discount_pct"]
199
+
200
  new_discount = np.clip(d0 + delta_discount, 0.0, 0.45)
201
  p1 = max(0.01, base["list_price"] * (1 - new_discount))
202
  c1 = max(0.01, c0 + delta_unit_cost)
203
 
 
204
  if p0 <= 0:
205
  q1 = q0
206
  else:
 
231
 
232
  with st.sidebar:
233
  st.header("⚙️ Controls")
234
+ fast_mode = st.toggle("Fast mode (recommended on Spaces)", value=True)
235
+ if fast_mode:
236
+ days = st.slider("History (days)", 30, 120, 60, 1)
237
+ rows_per_day = st.slider("Rows per day", 300, 2000, 600, 100)
238
+ else:
239
+ days = st.slider("History (days)", 45, 180, 90, 1)
240
+ rows_per_day = st.slider("Rows per day", 300, 3000, 1200, 100)
241
+
242
  seed = st.number_input("Random seed", value=42, step=1)
243
  st.markdown("---")
244
  st.markdown("**Training**")
245
+ n_trees = st.slider("RandomForest trees", 100, 600, 250 if fast_mode else 400, 50)
246
  st.caption("Model: RandomForestRegressor (SHAP via TreeExplainer)")
247
  st.markdown("---")
248
+ st.markdown("**SHAP computation**")
249
+ shap_sample = st.slider("SHAP sample size", 200, 3000, 800 if fast_mode else 1800, 100)
250
+ st.markdown("---")
251
  st.markdown("**What-if Defaults**")
252
  default_disc_step = st.slider("Default discount step (points)", -5.0, 5.0, -1.5, 0.1)
253
  default_cost_step = st.slider("Default unit cost change", -5.0, 5.0, 0.0, 0.1)
 
277
  st.plotly_chart(fig, use_container_width=True)
278
 
279
  # Train
280
+ with st.spinner("Training model…"):
281
+ pipe, metrics, X_test = train_model(df_feat, feats_num, feats_cat, target, n_estimators=int(n_trees))
282
 
283
  st.success(f"Model trained • R²={metrics['r2']:.3f} • MAE={metrics['mae']:.4f} (GM% points)")
284
 
285
+ # SHAP: compute on demand
286
  st.subheader("🔍 Driver Analysis (Global)")
287
+ if "shap_imp_df" not in st.session_state:
288
+ st.session_state["shap_imp_df"] = None
289
+ if "shap_joined" not in st.session_state:
290
+ st.session_state["shap_joined"] = None
291
+
292
+ compute_now = st.button("Compute / Refresh SHAP drivers")
293
+ if compute_now or st.session_state["shap_imp_df"] is None:
294
+ with st.spinner("Computing SHAP (sampled)"):
295
+ shap_df, expected_value, X_test_sample, feature_names = compute_shap(pipe, X_test, feats_num, feats_cat, shap_sample=int(shap_sample))
296
+ mean_abs = shap_df.abs().mean().sort_values(ascending=False)
297
+ imp_df = pd.DataFrame({"feature": mean_abs.index, "mean_abs_shap": mean_abs.values})
298
+ st.session_state["shap_imp_df"] = imp_df
299
+ # Keep a joined frame for segment view
300
+ cat_cols = ["product","region","channel"]
301
+ joined = pd.concat([X_test_sample.reset_index(drop=True), shap_df.reset_index(drop=True)], axis=1)
302
+ st.session_state["shap_joined"] = joined
303
+
304
+ imp_df = st.session_state["shap_imp_df"]
305
+ if imp_df is not None:
306
+ st.dataframe(imp_df.head(15), use_container_width=True)
307
+ fig2, ax = plt.subplots(figsize=(8,5))
308
+ imp_df.head(20).iloc[::-1].plot(kind="barh", x="feature", y="mean_abs_shap", ax=ax)
309
+ ax.set_title("Top Drivers — Mean |SHAP| (GM%)")
310
+ ax.set_xlabel("Mean |SHAP| contribution")
311
+ st.pyplot(fig2, clear_figure=True)
312
+ else:
313
+ st.info("Click **Compute / Refresh SHAP drivers** to see driver importance.")
314
+
315
+ # Segment analysis
316
  st.subheader("🧭 Where did it happen? (Segment view)")
317
+ joined = st.session_state["shap_joined"]
318
+ if joined is not None:
319
+ key_feats = [c for c in joined.columns if any(k in c for k in ["discount", "price_per_unit", "cost_per_unit","unit_cost","net_price"])]
320
+ grp = joined.groupby(["product","region","channel"]).mean(numeric_only=True)
321
+ rank_cols = [c for c in grp.columns if c in key_feats]
322
+ top_bad = grp[rank_cols].sum(axis=1).sort_values().head(10)
323
+ top_good = grp[rank_cols].sum(axis=1).sort_values(ascending=False).head(10)
324
+
325
+ c1, c2 = st.columns(2)
326
+ with c1:
327
+ st.caption("Segments dragging GM% (more negative net SHAP)")
328
+ st.write(top_bad.to_frame("net_shap_sum").round(4))
329
+ with c2:
330
+ st.caption("Segments lifting GM% (more positive net SHAP)")
331
+ st.write(top_good.to_frame("net_shap_sum").round(4))
332
+ else:
333
+ st.info("Compute SHAP first to populate the segment view.")
334
 
335
  # -----------------------------
336
  # What-if Simulator
337
  # -----------------------------
338
+ st.header("🧪 What-if Simulator & Recommendations")
339
 
 
340
  last_day = df["date"].max()
341
  seg_today = df[df["date"]==last_day][["product","region","channel"]].drop_duplicates().sort_values(["product","region","channel"])
342
  seg_choice = st.selectbox("Choose a segment (product × region × channel):",
 
350
 
351
  c3, c4 = st.columns(2)
352
  with c3:
353
+ delta_disc = st.slider("Change discount (percentage points)", -10.0, 10.0, -1.5, 0.1)
354
  with c4:
355
+ delta_cost = st.slider("Change unit cost (absolute)", -5.0, 5.0, 0.0, 0.1)
356
 
357
  sim_res = simulate_action(seg_hist, elasticity, delta_discount=delta_disc/100.0, delta_unit_cost=delta_cost)
358
 
 
371
  # Auto Recommendations
372
  # -----------------------------
373
  st.subheader("💡 Top Recommendations (ranked by expected uplift)")
374
+ if joined is not None:
375
+ recent_join = joined.copy()
376
+ recent_join["key"] = recent_join["product"] + "|" + recent_join["region"] + "|" + recent_join["channel"]
377
+ cand_cols = [c for c in recent_join.columns if ("discount" in c or "cost" in c or "price" in c)]
378
+ seg_scores = recent_join.groupby("key")[cand_cols].mean().sum(axis=1)
379
+ worst_keys = seg_scores.sort_values().head(20).index.tolist()
380
+
381
+ recs = []
382
+ seen = set()
383
+ for key in worst_keys:
384
+ p, r, c = key.split("|")
385
+ if key in seen:
386
+ continue
387
+ seen.add(key)
388
+ hist = df[(df["product"]==p)&(df["region"]==r)&(df["channel"]==c)].sort_values("date")
389
+ if hist.empty:
390
+ continue
391
+ eps, _ = estimate_segment_elasticity(hist, p, r, c)
392
+ prop_disc_pts = -np.clip(abs(seg_scores[key])*10, 0.5, 2.0) # propose 0.5–2.0 pts tightening
393
+ sim = simulate_action(hist, eps, delta_discount=prop_disc_pts/100.0, delta_unit_cost=0.0)
394
+ if sim is None:
395
+ continue
396
+ recs.append({
397
+ "segment": f"{p} {r} • {c}",
398
+ "action": f"Reduce discount by {abs(prop_disc_pts):.1f} pts",
399
+ "expected_gm_uplift": sim["gm_delta_value"],
400
+ "new_discount_pct": sim["new_discount"]*100,
401
+ "elasticity": eps,
402
+ "notes": "Driven by negative discount/price SHAP"
403
+ })
404
+
405
+ rec_df = pd.DataFrame(recs).sort_values("expected_gm_uplift", ascending=False)
406
+ st.dataframe(rec_df.head(15), use_container_width=True)
407
+ st.download_button("⬇️ Download recommendations (CSV)",
408
+ data=rec_df.to_csv(index=False).encode("utf-8"),
409
+ file_name="gm_recommendations.csv",
410
+ mime="text/csv")
411
+ else:
412
+ st.info("Compute SHAP first to generate recommendation candidates.")
 
 
 
413
 
414
  st.markdown("---")
415
+ st.caption("Demo only — synthetic data & simplified economics. For production, plug in your CDS feed and business constraints.")
requirements.txt CHANGED
@@ -1,7 +1,8 @@
 
1
  streamlit==1.37.1
2
  pandas==2.2.2
3
  numpy==1.26.4
4
  scikit-learn==1.4.2
5
  shap==0.45.1
6
  matplotlib==3.9.0
7
- plotly==5.22.0
 
1
+
2
  streamlit==1.37.1
3
  pandas==2.2.2
4
  numpy==1.26.4
5
  scikit-learn==1.4.2
6
  shap==0.45.1
7
  matplotlib==3.9.0
8
+ plotly==5.22.0