Spaces:

PD03
/

SALT_Dashboard

Running

File size: 11,589 Bytes

a5de76b
82197ce
73a7361
a5de76b
fcccb84
f7dbc16
82197ce
 
fcccb84
ec9c243
 
82197ce
 
ec9c243
 
 
 
 
 
 
 
 
82197ce
 
 
ec9c243
4303c4f
82197ce
6b2600f
06e3d05
73f080a
06e3d05
 
a5de76b
f7dbc16
f325816
f7dbc16
06e3d05
4303c4f
ec9c243
73f080a
 
06e3d05
ec9c243
f7dbc16
ec9c243
 
 
 
 
1611d3c
6b2600f
73f080a
6b2600f
ec9c243
4303c4f
 
6b2600f
4303c4f
 
 
 
06e3d05
ec9c243
4303c4f
a5de76b
 
ec9c243
 
a5de76b
 
 
 
ec9c243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a5de76b
ec9c243
 
 
 
 
 
 
 
 
a5de76b
ec9c243
 
 
 
 
 
 
a5de76b
 
 
 
ec9c243
 
a5de76b
ec9c243
 
 
a5de76b
ec9c243
 
a5de76b
6b2600f
73f080a
82197ce
ec9c243
 
 
 
 
 
73f080a
 
ec9c243
73f080a
 
 
 
ec9c243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73f080a
 
 
 
ec9c243
 
 
 
 
a5de76b
ec9c243
 
 
73f080a
 
 
ec9c243
 
 
 
 
a5de76b
ec9c243
 
 
73f080a
 
 
ec9c243
 
73f080a
ec9c243
 
a5de76b
ec9c243
 
 
 
 
73f080a
 
 
ec9c243
 
 
 
 
a5de76b
ec9c243
 
 
73f080a
 
ec9c243

# app.py
import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import os
import warnings
warnings.filterwarnings('ignore')

# ---------- Page & Styles ----------
st.set_page_config(page_title="SAP Sales KPI Dashboard", page_icon="📊", layout="wide")
st.markdown("""
<style>
  /* hide default sidebar entirely */
  [data-testid="stSidebar"] { display: none; }
  .main-header { font-size: 2.2rem; font-weight: 800; color: #1f4e79; text-align: left; margin-bottom: .25rem; }
  .subtle { color:#6b7280; margin-bottom:1.25rem; }
  .filter-card { background:#f8f9fa; padding: .9rem 1rem; border-radius:12px; border:1px solid #edf2f7; }
  .kpi-card { background: #ffffff; padding: 1.25rem; border-radius: 14px; border:1px solid #e5e7eb; box-shadow: 0 1px 2px rgba(0,0,0,.03); }
  .kpi-value { font-size: 2.1rem; font-weight: 800; color: #1f4e79; line-height:1; }
  .kpi-label { font-size: .95rem; color: #6b7280; }
  .block-container { padding-top: 1.2rem; }
</style>
""", unsafe_allow_html=True)

# ---------- Kaggle load ----------
@st.cache_data(ttl=3600)
def load_kaggle_sap_data():
    try:
        if 'KAGGLE_USERNAME' not in st.secrets or 'KAGGLE_KEY' not in st.secrets:
            return "Kaggle credentials not found in Streamlit secrets."
        os.environ['KAGGLE_USERNAME'] = st.secrets['KAGGLE_USERNAME']
        os.environ['KAGGLE_KEY'] = st.secrets['KAGGLE_KEY']

        import kaggle
        dataset_name = "mustafakeser4/sap-dataset-bigquery-dataset"
        download_path = "./kaggle_data"

        if not os.path.exists(os.path.join(download_path, 'vbak.csv')):
            with st.spinner("Downloading dataset from Kaggle..."):
                kaggle.api.authenticate()
                kaggle.api.dataset_download_files(dataset_name, path=download_path, unzip=True)

        needed = {'vbak': 'vbak.csv', 'vbap': 'vbap.csv', 'kna1': 'kna1.csv', 'makt': 'makt.csv'}
        tables = {}
        for k, fn in needed.items():
            fp = os.path.join(download_path, fn)
            if not os.path.exists(fp):
                return f"Expected file missing: {fn}"
            tables[k] = pd.read_csv(fp, low_memory=False)
        return tables
    except Exception as e:
        return f"Error during Kaggle data loading: {e}"

# ---------- Processing (robust revenue + safe merges) ----------
@st.cache_data
def create_sales_analytics(_tables):
    try:
        vbak = _tables['vbak'].copy()
        vbap = _tables['vbap'].copy()
        kna1 = _tables['kna1'].copy()
        makt = _tables['makt'].copy()

        # normalize column names
        for df in [vbak, vbap, kna1, makt]:
            df.columns = [c.upper().strip() for c in df.columns]

        # keep SAP keys as strings (avoid leading-zero loss)
        def as_str(df, cols):
            for c in cols:
                if c in df.columns:
                    df[c] = df[c].astype(str).str.strip()

        as_str(vbak, ['VBELN','KUNNR','VKORG','VTWEG'])
        as_str(vbap, ['VBELN','MATNR'])
        as_str(kna1, ['KUNNR'])
        as_str(makt, ['MATNR'])

        # choose numeric helper
        def pick_numeric(df, cols):
            for c in cols:
                if c in df.columns:
                    s = pd.to_numeric(df[c], errors='coerce')
                    if s.notna().sum() > 0 and s.abs().sum() > 0:
                        return s
            return pd.Series(0.0, index=df.index)

        # Build item-level REVENUE
        # primary: NETWR at item level (VBAP)
        netwr_item = pick_numeric(vbap, ['NETWR'])

        # fallback: price * qty using common SAP columns
        price = pick_numeric(vbap, ['NETPR', 'KBETR', 'NETPR_I'])
        qty   = pick_numeric(vbap, ['KWMENG', 'KTMNG', 'MENGE'])
        fallback_rev = (price.fillna(0) * qty.fillna(0)).fillna(0)

        vbap['REVENUE'] = np.where(netwr_item > 0, netwr_item, fallback_rev).astype(float)

        # header fields (include currency if present)
        keep_vbak = ['VBELN','KUNNR','VKORG','VTWEG','ERDAT'] + (['WAERK'] if 'WAERK' in vbak.columns else [])
        vbak_small = vbak[keep_vbak].drop_duplicates('VBELN')
        vbap_small = vbap[['VBELN','MATNR','REVENUE']]
        kna1_small = kna1[['KUNNR','NAME1','LAND1']] if {'KUNNR','NAME1','LAND1'}.issubset(kna1.columns) else pd.DataFrame(columns=['KUNNR','NAME1','LAND1'])

        # product text in English
        makt_en = makt[makt['SPRAS'].eq('E')] if 'SPRAS' in makt.columns else makt
        makt_small = makt_en[['MATNR','MAKTX']].drop_duplicates('MATNR') if {'MATNR','MAKTX'}.issubset(makt_en.columns) else pd.DataFrame(columns=['MATNR','MAKTX'])

        # final sales table
        sales = (vbap_small
                 .merge(vbak_small, on='VBELN', how='inner')
                 .merge(kna1_small, on='KUNNR', how='left')
                 .merge(makt_small, on='MATNR', how='left'))

        if 'ERDAT' in sales.columns:
            sales['ERDAT'] = pd.to_datetime(sales['ERDAT'], errors='coerce')

        # ensure columns exist
        for col in ['REVENUE','LAND1','VTWEG','NAME1','MAKTX','VBELN','KUNNR','VKORG']:
            if col not in sales.columns:
                sales[col] = np.nan if col != 'REVENUE' else 0.0
        if 'WAERK' not in sales.columns:
            sales['WAERK'] = 'N/A'

        # drop obvious junk rows
        sales = sales.replace([np.inf, -np.inf], np.nan).dropna(subset=['REVENUE'])
        return sales
    except Exception as e:
        return f"Error processing sales data: {e}"

# ---------- App ----------
st.markdown('<div class="main-header">🎯 SAP Sales KPI Dashboard</div><div class="subtle">Real SAP ERP sample data (Kaggle)</div>', unsafe_allow_html=True)

tables = load_kaggle_sap_data()
if isinstance(tables, str):
    st.error(tables)
    st.stop()

sales_df = create_sales_analytics(tables)
if isinstance(sales_df, str):
    st.error(sales_df)
    st.stop()

# ---------- Filter Bar (no sidebar) ----------
with st.container():
    st.markdown('<div class="filter-card">', unsafe_allow_html=True)
    c1, c2, c3, c4 = st.columns([1.2, 1.2, 3, 0.9])

    # currency filter
    currencies = [c for c in sales_df['WAERK'].dropna().unique().tolist() if c != 'N/A']
    default_cur = sales_df['WAERK'].mode().iat[0] if len(sales_df) and sales_df['WAERK'].notna().any() else 'N/A'
    with c1:
        currency = st.selectbox("Currency", options=(['All'] + sorted(currencies)) if currencies else ['All'], index=0 if currencies else 0)

    # Top N
    with c2:
        top_n_countries = st.slider("Top N Countries", 5, 20, 10)

    # Region multiselect inside expander to keep tidy
    with c3:
        with st.expander("Region (Country) – click to choose", expanded=False):
            all_countries = sorted(sales_df['LAND1'].dropna().unique().tolist())
            # buttons to select/clear
            b1, b2 = st.columns([1,1])
            if 'selected_countries' not in st.session_state:
                st.session_state.selected_countries = all_countries
            with b1:
                if st.button("Select All"):
                    st.session_state.selected_countries = all_countries
            with b2:
                if st.button("Clear"):
                    st.session_state.selected_countries = []
            selected_region = st.multiselect("Countries", options=all_countries, default=st.session_state.selected_countries, key="countries_ms")

    with c4:
        if st.button("🔄 Clear Cache"):
            st.cache_data.clear()
            st.rerun()
    st.markdown('</div>', unsafe_allow_html=True)

# apply filters
filtered_df = sales_df.copy()
if currency and currency != 'All':
    filtered_df = filtered_df[filtered_df['WAERK'] == currency]
if 'countries_ms' in st.session_state:
    filtered_df = filtered_df[filtered_df['LAND1'].isin(st.session_state.countries_ms)]

st.success(f"✅ Loaded and processed {len(filtered_df):,} sales line-items after filters.")

# ---------- KPIs ----------
st.subheader("Sales KPIs")
k1,k2,k3,k4 = st.columns(4)
with k1: st.markdown(f'<div class="kpi-card"><div class="kpi-value">${float(filtered_df["REVENUE"].sum()):,.0f}</div><div class="kpi-label">Total Revenue</div></div>', unsafe_allow_html=True)
with k2: st.markdown(f'<div class="kpi-card"><div class="kpi-value">{int(filtered_df["KUNNR"].nunique())}</div><div class="kpi-label">Active Customers</div></div>', unsafe_allow_html=True)
with k3:
    aov = float(filtered_df.loc[filtered_df['REVENUE']>0,'REVENUE'].mean() or 0.0)
    st.markdown(f'<div class="kpi-card"><div class="kpi-value">${aov:,.0f}</div><div class="kpi-label">Avg Order Value (item)</div></div>', unsafe_allow_html=True)
with k4: st.markdown(f'<div class="kpi-card"><div class="kpi-value">{int(filtered_df["VBELN"].nunique())}</div><div class="kpi-label">Sales Orders</div></div>', unsafe_allow_html=True)

# ---------- Tabs ----------
tab1, tab2, tab3, tab4 = st.tabs(["👥 Top Customers", "🌍 Regional Analysis", "📈 Distribution Channels", "🛍️ Top Products"])

with tab1:
    st.subheader("Top 10 Customers by Revenue")
    customer_summary = (filtered_df.dropna(subset=['NAME1'])
                        .groupby('NAME1', as_index=False)['REVENUE'].sum()
                        .nlargest(10, 'REVENUE'))
    if customer_summary.empty:
        st.info("No customer data to display.")
    else:
        fig = px.bar(customer_summary, x='REVENUE', y='NAME1', orientation='h',
                     labels={'REVENUE':'Revenue','NAME1':'Customer'}, color='REVENUE')
        st.plotly_chart(fig.update_layout(yaxis={'categoryorder':'total ascending'}), use_container_width=True)

with tab2:
    st.subheader("Revenue by Country")
    regional_summary = (filtered_df.dropna(subset=['LAND1'])
                        .groupby('LAND1', as_index=False)['REVENUE'].sum()
                        .nlargest(top_n_countries, 'REVENUE'))
    if regional_summary.empty:
        st.info("No country data to display.")
    else:
        fig = px.pie(regional_summary, values='REVENUE', names='LAND1',
                     title=f"Top {top_n_countries} Countries by Revenue")
        st.plotly_chart(fig, use_container_width=True)

with tab3:
    st.subheader("Revenue by Distribution Channel")
    channel_summary = (filtered_df.dropna(subset=['VTWEG'])
                       .groupby('VTWEG', as_index=False)['REVENUE'].sum())
    channel_summary['VTWEG'] = channel_summary['VTWEG'].astype(str)
    if channel_summary.empty:
        st.info("No distribution channel data to display.")
    else:
        fig = px.bar(channel_summary, x='VTWEG', y='REVENUE',
                     title="Total Revenue by Distribution Channel",
                     labels={'REVENUE':'Total Revenue','VTWEG':'Distribution Channel'},
                     color='REVENUE')
        st.plotly_chart(fig, use_container_width=True)

with tab4:
    st.subheader("Top 10 Products by Revenue")
    product_summary = (filtered_df.dropna(subset=['MAKTX'])
                       .groupby('MAKTX', as_index=False)['REVENUE'].sum()
                       .nlargest(10, 'REVENUE'))
    if product_summary.empty:
        st.info("No product data to display.")
    else:
        fig = px.bar(product_summary, x='REVENUE', y='MAKTX', orientation='h',
                     labels={'REVENUE':'Revenue','MAKTX':'Product'}, color='REVENUE')
        st.plotly_chart(fig.update_layout(yaxis={'categoryorder':'total ascending'}), use_container_width=True)

st.markdown("---")
st.markdown("<p style='text-align:center;'>Built with Streamlit • Real SAP ERP sample data (Kaggle)</p>", unsafe_allow_html=True)