import os import time import json import math import random from dataclasses import dataclass from typing import Any, Dict, List, Optional, Tuple import numpy as np import pandas as pd import streamlit as st import plotly.express as px import plotly.graph_objects as go from streamlit_option_menu import option_menu from faker import Faker from datetime import datetime, timedelta # ============================= # Page / Theme Configuration # ============================= st.set_page_config( page_title="SAP S/4HANA Agentic AI Procurement Analytics", page_icon="🤖", layout="wide", initial_sidebar_state="expanded", ) # --- CSS --- st.markdown( """ """, unsafe_allow_html=True, ) # ============================= # Config & LLM Client (robust, version-agnostic) # ============================= @dataclass class LLMConfig: provider: str = os.getenv("LLM_PROVIDER", "openai").lower() # openai | azure | compatible base_url: Optional[str] = os.getenv("OPENAI_BASE_URL") # for compatible endpoints api_key: Optional[str] = ( os.getenv("OPENAI_API_KEY") or os.getenv("OPENAI_API_TOKEN") or os.getenv("OPENAI_KEY") ) model: str = os.getenv("OPENAI_MODEL", "gpt-4o-mini") timeout: int = int(os.getenv("OPENAI_TIMEOUT", "45")) max_retries: int = int(os.getenv("OPENAI_MAX_RETRIES", "5")) temperature: float = float(os.getenv("OPENAI_TEMPERATURE", "0.6")) def _post_json(url: str, headers: Dict[str, str], payload: Dict[str, Any], timeout: int): import requests return requests.post(url, headers=headers, json=payload, timeout=timeout) class UniversalLLMClient: """A resilient client that works with OpenAI, Azure OpenAI, and compatible APIs. - Prefers /chat/completions - Falls back to /responses if available - Retries with exponential backoff and respects Retry-After """ def __init__(self, cfg: LLMConfig): self.cfg = cfg self.available = bool(cfg.api_key) self.last_error: Optional[str] = None if self.available: self._smoke_test() def _headers(self) -> Dict[str, str]: return {"Authorization": f"Bearer {self.cfg.api_key}", "Content-Type": "application/json"} def _base_url(self) -> str: if self.cfg.provider == "azure": # Use Azure env format if provided endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") api_version = os.getenv("AZURE_OPENAI_API_VERSION", "2024-02-15-preview") deployment = os.getenv("AZURE_OPENAI_DEPLOYMENT", self.cfg.model) # Azure uses deployment name in path return f"{endpoint}/openai/deployments/{deployment}?api-version={api_version}" return (self.cfg.base_url or "https://api.openai.com/v1").rstrip("/") def _smoke_test(self): try: _ = self.chat([ {"role": "user", "content": "ping"} ], max_tokens=4) except Exception as e: self.available = False self.last_error = str(e) def chat(self, messages: List[Dict[str, str]], max_tokens: int = 400) -> str: if not self.available: raise RuntimeError("No API key configured") headers = self._headers() base = self._base_url() # Endpoint selection chat_url = f"{base}/chat/completions" if self.cfg.provider != "azure" else f"{base}&api-version-override=false" # azure path already includes params responses_url = f"{base}/responses" payload = { "model": self.cfg.model, "messages": messages, "max_tokens": max_tokens, "temperature": self.cfg.temperature, } # Retry with backoff delay = 1.0 for attempt in range(self.cfg.max_retries): try: resp = _post_json(chat_url, headers, payload, self.cfg.timeout) if resp.status_code == 200: data = resp.json() return data["choices"][0]["message"]["content"].strip() # Try /responses fallback for some providers if resp.status_code in (404, 400): alt = _post_json( responses_url, headers, {"model": self.cfg.model, "input": messages, "max_output_tokens": max_tokens, "temperature": self.cfg.temperature}, self.cfg.timeout, ) if alt.status_code == 200: return alt.json()["output"][0]["content"][0]["text"].strip() if resp.status_code in (429, 500, 502, 503, 504): retry_after = float(resp.headers.get("Retry-After", delay)) time.sleep(retry_after) delay = min(delay * 2, 8.0) continue # Other errors → raise try: j = resp.json() msg = j.get("error", {}).get("message", str(j)) except Exception: msg = resp.text raise RuntimeError(f"API error {resp.status_code}: {msg}") except Exception as e: if attempt == self.cfg.max_retries - 1: self.last_error = str(e) raise time.sleep(delay) delay = min(delay * 2, 8.0) raise RuntimeError("Exhausted retries") # ============================= # Data Generation & Utils # ============================= @st.cache_data(show_spinner=False) def generate_synthetic_procurement_data(seed: int = 42) -> Tuple[pd.DataFrame, pd.DataFrame]: """Generate richer synthetic SAP S/4HANA procurement data, including lead times and late flags.""" fake = Faker() np.random.seed(seed) random.seed(seed) vendors = [ "Siemens AG", "BASF SE", "BMW Group", "Mercedes-Benz", "Bosch GmbH", "ThyssenKrupp", "Bayer AG", "Continental AG", "Henkel AG", "SAP SE", ] categories = [ "Raw Materials", "Components", "Packaging", "Services", "IT Equipment", "Office Supplies", "Machinery", "Chemicals", ] purchase_orders: List[Dict[str, Any]] = [] today = datetime.utcnow().date() for i in range(900): order_date = fake.date_between(start_date='-24m', end_date='today') promised_days = random.randint(3, 30) promised_date = order_date + timedelta(days=promised_days) actual_lag = max(1, int(np.random.normal(promised_days, 5))) delivery_date = order_date + timedelta(days=actual_lag) late = delivery_date > promised_date unit_price = round(random.uniform(10, 500), 2) qty = random.randint(1, 1200) order_value = round(unit_price * qty, 2) po = { 'po_number': f"PO{str(i+1).zfill(6)}", 'vendor': random.choice(vendors), 'material_category': random.choice(categories), 'order_date': order_date, 'promised_date': promised_date, 'delivery_date': delivery_date, 'lead_time_days': (delivery_date - order_date).days, 'promised_days': promised_days, 'late_delivery': late, 'order_value': order_value, 'quantity': qty, 'unit_price': unit_price, 'status': random.choice(['Open', 'Delivered', 'Invoiced', 'Paid']), 'plant': random.choice(['Plant_001', 'Plant_002', 'Plant_003']), 'buyer': fake.name(), 'currency': 'EUR', 'payment_terms': random.choice(['30 Days', '45 Days', '60 Days', '90 Days']), 'quality_score': round(np.clip(np.random.normal(8.5, 0.8), 5.0, 10.0), 1), } purchase_orders.append(po) spend_rows = [] for v in vendors: for c in categories: spend_rows.append({ 'vendor': v, 'category': c, 'total_spend': round(random.uniform(10000, 700000), 2), 'contract_compliance': round(random.uniform(78, 100), 1), 'risk_score': round(random.uniform(1, 10), 1), 'savings_potential': round(random.uniform(5, 25), 1), }) po_df = pd.DataFrame(purchase_orders) spend_df = pd.DataFrame(spend_rows) return po_df, spend_df def eur(x: float) -> str: return f"€{x:,.0f}" # ============================= # Analytics Engine # ============================= class ProcurementAnalytics: def __init__(self, po_df: pd.DataFrame): self.df = po_df.copy() self.df['order_date'] = pd.to_datetime(self.df['order_date']) self.df['month'] = self.df['order_date'].dt.to_period('M').dt.to_timestamp() @st.cache_data(show_spinner=False) def kpis(_self, df_hash: int) -> Dict[str, Any]: df = _self.df return { 'total_spend': float(df['order_value'].sum()), 'avg_order_value': float(df['order_value'].mean()), 'active_vendors': int(df['vendor'].nunique()), 'on_time_rate': float((~df['late_delivery']).mean()), 'quality_avg': float(df['quality_score'].mean()), } def category_spend(self) -> pd.DataFrame: return ( self.df.groupby('material_category', as_index=False)['order_value'].sum() .sort_values('order_value', ascending=False) ) def vendor_spend(self, top_n: int = 8) -> pd.DataFrame: g = self.df.groupby('vendor', as_index=False)['order_value'].sum() return g.sort_values('order_value', ascending=False).head(top_n) def monthly_spend(self) -> pd.DataFrame: return self.df.groupby('month', as_index=False)['order_value'].sum().sort_values('month') def vendor_performance(self) -> pd.DataFrame: g = self.df.groupby('vendor').agg( total_spend=('order_value', 'sum'), on_time=('late_delivery', lambda s: 1 - s.mean()), quality=('quality_score', 'mean'), orders=('po_number', 'count'), lead_time=('lead_time_days', 'mean'), ) g['on_time'] = (g['on_time'] * 100).round(1) g['quality'] = g['quality'].round(2) g['lead_time'] = g['lead_time'].round(1) g['total_spend'] = g['total_spend'].round(2) return g.sort_values('total_spend', ascending=False) def anomalies(self) -> pd.DataFrame: # Simple IQR for order_value anomalies q1, q3 = self.df['order_value'].quantile([0.25, 0.75]) iqr = q3 - q1 hi = q3 + 1.5 * iqr lo = max(0, q1 - 1.5 * iqr) a = self.df[(self.df['order_value'] > hi) | (self.df['order_value'] < lo)].copy() a['anomaly_reason'] = np.where(a['order_value'] > hi, 'High value', 'Low value') return a.sort_values('order_value', ascending=False).head(50) def simulate_vendor_consolidation(self, keep_top: int) -> Dict[str, Any]: g = self.df.groupby('vendor')['order_value'].sum().sort_values(ascending=False) kept_vendors = list(g.head(keep_top).index) kept_spend = self.df[self.df['vendor'].isin(kept_vendors)]['order_value'].sum() total_spend = self.df['order_value'].sum() share = kept_spend / total_spend if total_spend else 0 est_savings = 0.05 + (0.12 * (1 - share)) # heuristic: better leverage when consolidating return { 'kept_vendors': kept_vendors, 'kept_share': share, 'estimated_savings_pct': max(0.03, min(0.18, est_savings)), } # ============================= # Agent (uses UniversalLLMClient with safe fallback) # ============================= class UniversalProcurementAgent: def __init__(self, po_df: pd.DataFrame, spend_df: pd.DataFrame, client: UniversalLLMClient): self.po_data = po_df self.spend_data = spend_df self.llm = client def llm_status(self) -> Dict[str, Any]: return { "api_key_available": bool(self.llm.cfg.api_key), "llm_available": self.llm.available, "last_error": self.llm.last_error or "Connected successfully" if self.llm.available else "Unavailable", "provider": self.llm.cfg.provider, "model": self.llm.cfg.model, "base_url": self.llm.cfg.base_url or "https://api.openai.com/v1", } def _rule_summary(self) -> str: total_spend = float(self.po_data['order_value'].sum()) on_time = float((~self.po_data['late_delivery']).mean()) * 100 quality = float(self.po_data['quality_score'].mean()) top_cat = self.po_data.groupby('material_category')['order_value'].sum().idxmax() top_vendor = self.po_data.groupby('vendor')['order_value'].sum().idxmax() return ( "🤖 **[Smart Analysis - Rule-Based Engine]**\n" "**Executive Snapshot**\n" f"• Total spend: {eur(total_spend)} across {len(self.po_data):,} POs\n" f"• On-time delivery: {on_time:.1f}% • Avg quality: {quality:.1f}/10\n" f"• Top category: {top_cat} • Lead vendor: {top_vendor}\n\n" "**Opportunities**\n" "• Consolidate long tail vendors to improve pricing power (5–12% potential).\n" "• Tighten SLAs for late deliveries and extend performance-based contracts.\n" "• Automate low-value buys to reduce cycle time." ) def executive_summary(self) -> str: if not self.llm.available: return self._rule_summary() data_summary = { "total_spend": float(self.po_data['order_value'].sum()), "total_orders": int(len(self.po_data)), "vendor_count": int(self.po_data['vendor'].nunique()), "avg_order_value": float(self.po_data['order_value'].mean()), "on_time_delivery": float((~self.po_data['late_delivery']).mean()), "avg_quality": float(self.po_data['quality_score'].mean()), } messages = [ {"role": "system", "content": "You are a senior procurement analyst with expertise in SAP S/4HANA. Be concise, metric-driven, and actionable."}, {"role": "user", "content": ( "Create an executive summary covering: 1) overview (2-3 sentences), 2) KPI highlights, 3) risks/alerts, 4) 3-4 strategic recommendations with quantified impact.\n" f"Data: {json.dumps(data_summary)}" )}, ] try: return "🧠 **[AI-Powered Analysis]**\n\n" + self.llm.chat(messages, max_tokens=650) except Exception as e: return self._rule_summary() + f"\n\n*AI fallback due to: {e}*" def chat_with_data(self, question: str) -> str: if not self.llm.available: return self._rule_answer(question) context = { "total_spend": float(self.po_data['order_value'].sum()), "orders": int(len(self.po_data)), "vendors": int(self.po_data['vendor'].nunique()), "on_time": float((~self.po_data['late_delivery']).mean()), "quality": float(self.po_data['quality_score'].mean()), } messages = [ {"role": "system", "content": "You are an expert procurement co-pilot. Use the provided context and respond with precise metrics and concrete actions."}, {"role": "user", "content": f"Question: {question}\nContext: {json.dumps(context)}"}, ] try: return "🧠 **[AI Response]**\n\n" + self.llm.chat(messages, max_tokens=450) except Exception as e: return self._rule_answer(question) + f"\n\n*AI fallback due to: {e}*" def _rule_answer(self, question: str) -> str: q = question.lower() if any(w in q for w in ["spend", "cost", "budget"]): total = float(self.po_data['order_value'].sum()) monthly = total / max(1, self.po_data['order_date'].nunique()/30) top_cat = self.po_data.groupby('material_category')['order_value'].sum().idxmax() return ( "🤖 **[Smart Analysis] Spend**\n" f"• Total spend: {eur(total)}\n" f"• Monthly average (approx): {eur(monthly)}\n" f"• Top category: {top_cat}\n" "Tip: prioritize competitive events for the top 2 categories to unlock 4–8% savings." ) if any(w in q for w in ["vendor", "supplier", "partner"]): vp = self.po_data.groupby('vendor').agg( spend=('order_value','sum'), on_time=('late_delivery', lambda s: 1 - s.mean()), ).sort_values('spend', ascending=False).head(1) top = vp.index[0] on_time = float(vp.iloc[0]['on_time'])*100 return ( "🤖 **[Smart Analysis] Vendor**\n" f"• Top vendor: {top} • On-time: {on_time:.1f}%\n" "Action: lock in volume tiers and add delivery penalties to the contract." ) if any(w in q for w in ["risk", "late", "delay"]): late_rate = float(self.po_data['late_delivery'].mean())*100 return ( "🤖 **[Smart Analysis] Risk**\n" f"• Late delivery rate: {late_rate:.1f}%\n" "Action: add buffer to planning lead times and escalate chronic late suppliers." ) return ( "🤖 **[Smart Analysis]** I can help with spend, vendor performance, risk, savings, and trends. Try: \"Where can I save 10%?\"" ) # ============================= # App State & Initialization # ============================= if 'data_loaded' not in st.session_state: with st.spinner('🔄 Generating synthetic SAP S/4HANA procurement data...'): st.session_state.po_df, st.session_state.spend_df = generate_synthetic_procurement_data() st.session_state.data_loaded = True @st.cache_resource(show_spinner=False) def get_llm_client() -> UniversalLLMClient: return UniversalLLMClient(LLMConfig()) client = get_llm_client() agent = UniversalProcurementAgent(st.session_state.po_df, st.session_state.spend_df, client) analytics = ProcurementAnalytics(st.session_state.po_df) status = agent.llm_status() api_status = "🟢 Connected" if status['llm_available'] else "🔴 Not Connected" # ============================= # Header # ============================= st.markdown( f"""

🤖 SAP S/4HANA Agentic AI Procurement Analytics

Autonomous Intelligence for Procurement Excellence

OpenAI: {api_status} · Data: {len(st.session_state.po_df):,} POs
""", unsafe_allow_html=True, ) # ============================= # Sidebar # ============================= with st.sidebar: st.markdown("### 🤖 AI System Status") st.markdown(f"**Connection:** {api_status}") st.markdown(f"**Provider:** {status['provider']} ") st.markdown(f"**Model:** {status['model']}") with st.expander("🔍 System Information"): safe = status.copy() # Do not expose API key st.json({k: v for k, v in safe.items() if k != 'api_key'}) if st.button("🔄 Test AI Connection"): if status['llm_available']: st.success("LLM is reachable and ready.") else: st.error(f"LLM unavailable: {status['last_error']}") st.markdown("---") selected = option_menu( "Navigation", ["🏠 Dashboard", "💬 AI Chat", "📊 Analytics", "🧪 What‑If", "🎯 Recommendations"], icons=['house', 'chat', 'bar-chart', 'beaker', 'target'], menu_icon="cast", default_index=0, styles={ "container": {"padding": "0!important", "background-color": "#fafafa"}, "icon": {"color": "#0066cc", "font-size": "18px"}, "nav-link": {"font-size": "16px", "text-align": "left", "margin": "0px", "--hover-color": "#eee"}, "nav-link-selected": {"background-color": "#0066cc"}, }, ) # ============================= # Main Views # ============================= if selected == "🏠 Dashboard": st.markdown("### 🧠 AI Executive Summary") with st.spinner('🤖 Analyzing procurement data...'): summary = agent.executive_summary() st.markdown(f"""

📊 Intelligent Analysis

{summary}
""", unsafe_allow_html=True) k = analytics.kpis(hash(tuple(st.session_state.po_df['po_number']))) c1, c2, c3, c4 = st.columns(4) with c1: st.markdown(f"

Total Spend

{eur(k['total_spend'])}

📈 Active Portfolio

", unsafe_allow_html=True) with c2: st.markdown(f"

Avg Order Value

{eur(k['avg_order_value'])}

📊 Order Efficiency

", unsafe_allow_html=True) with c3: st.markdown(f"

Active Vendors

{k['active_vendors']}

🤝 Strategic Partners

", unsafe_allow_html=True) with c4: st.markdown(f"

On‑Time Delivery

{k['on_time_rate']*100:.1f}%

⏱ Performance

", unsafe_allow_html=True) st.markdown("### 📊 Executive Dashboard") colA, colB = st.columns(2) with colA: cat = analytics.category_spend() fig = px.pie(cat, values='order_value', names='material_category', title='Spend Distribution by Category') fig.update_layout(title_font_size=16, title_x=0.5, height=420) st.plotly_chart(fig, use_container_width=True) with colB: vend = analytics.vendor_spend(top_n=8) fig2 = px.bar(vend, x='vendor', y='order_value', title='Top Vendors by Spend') fig2.update_layout(title_font_size=16, title_x=0.5, xaxis_tickangle=45, height=420) st.plotly_chart(fig2, use_container_width=True) colC, colD = st.columns(2) with colC: ms = analytics.monthly_spend() fig3 = px.line(ms, x='month', y='order_value', markers=True, title='Monthly Spend Trend') fig3.update_layout(title_font_size=16, title_x=0.5, height=420) st.plotly_chart(fig3, use_container_width=True) with colD: ano = analytics.anomalies() st.markdown("#### 🔎 High/Low Value Anomalies (Top 50)") st.dataframe(ano[['po_number','vendor','material_category','order_value','anomaly_reason']].reset_index(drop=True), use_container_width=True, height=380) elif selected == "💬 AI Chat": st.markdown("### 💬 Chat with Your Procurement Data") st.markdown(f"""

🤖 Universal AI Assistant

Ask me anything about your procurement data! I'm provider-agnostic and resilient to API versions.

Status: {api_status} | Provider: {status['provider']} | Model: {status['model']}

""", unsafe_allow_html=True) if "messages" not in st.session_state: st.session_state.messages = [ {"role": "assistant", "content": "Hello! I loaded your data and I'm ready to help—try asking about spend, vendors, or risk."} ] for m in st.session_state.messages: with st.chat_message(m["role"]): st.markdown(m["content"]) if prompt := st.chat_input("Ask about your procurement data…"): st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) with st.chat_message("assistant"): with st.spinner("🤖 Analyzing…"): reply = agent.chat_with_data(prompt) st.markdown(reply) st.session_state.messages.append({"role": "assistant", "content": reply}) st.markdown("#### 💡 Try quick questions:") c1, c2, c3 = st.columns(3) qs = ["What are my biggest spending areas?", "How are my vendors performing?", "Where can I save 10%?"] for i, (c, q) in enumerate(zip([c1, c2, c3], qs)): with c: if st.button(f"💭 {q}", key=f"q_{i}"): st.session_state.messages.append({"role": "user", "content": q}) st.session_state.messages.append({"role": "assistant", "content": agent.chat_with_data(q)}) st.rerun() elif selected == "📊 Analytics": st.markdown("### 📈 Advanced Analytics Dashboard") vp = analytics.vendor_performance() st.dataframe(vp.rename(columns={ 'total_spend': 'Total Spend (€)', 'on_time': 'On-Time Delivery %', 'quality': 'Quality Score', 'orders': 'Order Count', 'lead_time': 'Avg Lead Time (days)' }), use_container_width=True) st.download_button( label="⬇️ Download Vendor Performance (CSV)", data=vp.to_csv().encode('utf-8'), file_name="vendor_performance.csv", mime="text/csv", ) elif selected == "🧪 What‑If": st.markdown("### 🧪 What‑If: Vendor Consolidation Simulator") top_n = st.slider("Keep top N vendors by spend", min_value=2, max_value=10, value=6, step=1) sim = analytics.simulate_vendor_consolidation(keep_top=top_n) kept_names = ", ".join(sim['kept_vendors']) st.markdown( f"""
Scenario: Keep top {top_n} vendors. Estimated addressable spend share: {sim['kept_share']*100:.1f}%.
Potential savings: {sim['estimated_savings_pct']*100:.1f}% (heuristic).
Kept Vendors: {kept_names}
""", unsafe_allow_html=True, ) if st.checkbox("Show detailed vendor spend"): st.dataframe(analytics.vendor_spend(top_n=999), use_container_width=True) elif selected == "🎯 Recommendations": st.markdown("### 🚀 Strategic Recommendations") recs = [ "🎯 **Vendor Consolidation**: Reduce long-tail suppliers; target 8–15% price improvement via volume tiers.", "⚡ **Process Automation**: Auto-approve low-value POs to cut cycle time by 35–50%.", "📊 **Performance Contracts**: KPI-linked clauses for on-time delivery; add service credits for misses.", "🛡️ **Risk Monitoring**: Score suppliers on late rate, quality, and concentration; escalate chronic offenders.", "🧠 **AI Copilot**: Use LLM to draft RFQs, summarize bids, and propose award scenarios.", ] for i, rec in enumerate(recs, start=1): st.markdown( f"""

Recommendation #{i}

{rec}

""", unsafe_allow_html=True, ) # ============================= # Footer # ============================= st.markdown("---") st.markdown( f"""

🤖 Universal AI Procurement Analytics | Provider‑agnostic LLM integration with resilient fallbacks

Demo with synthetic data • {len(st.session_state.po_df):,} orders • OpenAI {api_status}

""", unsafe_allow_html=True, )