Spaces:
Running
Running
# app.py | |
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import plotly.express as px | |
import os | |
import warnings | |
warnings.filterwarnings('ignore') | |
# ---------- Page & Styles ---------- | |
st.set_page_config(page_title="SAP Sales KPI Dashboard", page_icon="π", layout="wide") | |
st.markdown(""" | |
<style> | |
/* hide default sidebar entirely */ | |
[data-testid="stSidebar"] { display: none; } | |
.main-header { font-size: 2.2rem; font-weight: 800; color: #1f4e79; text-align: left; margin-bottom: .25rem; } | |
.subtle { color:#6b7280; margin-bottom:1.25rem; } | |
.filter-card { background:#f8f9fa; padding: .9rem 1rem; border-radius:12px; border:1px solid #edf2f7; } | |
.kpi-card { background: #ffffff; padding: 1.25rem; border-radius: 14px; border:1px solid #e5e7eb; box-shadow: 0 1px 2px rgba(0,0,0,.03); } | |
.kpi-value { font-size: 2.1rem; font-weight: 800; color: #1f4e79; line-height:1; } | |
.kpi-label { font-size: .95rem; color: #6b7280; } | |
.block-container { padding-top: 1.2rem; } | |
</style> | |
""", unsafe_allow_html=True) | |
# ---------- Kaggle load ---------- | |
def load_kaggle_sap_data(): | |
try: | |
if 'KAGGLE_USERNAME' not in st.secrets or 'KAGGLE_KEY' not in st.secrets: | |
return "Kaggle credentials not found in Streamlit secrets." | |
os.environ['KAGGLE_USERNAME'] = st.secrets['KAGGLE_USERNAME'] | |
os.environ['KAGGLE_KEY'] = st.secrets['KAGGLE_KEY'] | |
import kaggle | |
dataset_name = "mustafakeser4/sap-dataset-bigquery-dataset" | |
download_path = "./kaggle_data" | |
if not os.path.exists(os.path.join(download_path, 'vbak.csv')): | |
with st.spinner("Downloading dataset from Kaggle..."): | |
kaggle.api.authenticate() | |
kaggle.api.dataset_download_files(dataset_name, path=download_path, unzip=True) | |
needed = {'vbak': 'vbak.csv', 'vbap': 'vbap.csv', 'kna1': 'kna1.csv', 'makt': 'makt.csv'} | |
tables = {} | |
for k, fn in needed.items(): | |
fp = os.path.join(download_path, fn) | |
if not os.path.exists(fp): | |
return f"Expected file missing: {fn}" | |
tables[k] = pd.read_csv(fp, low_memory=False) | |
return tables | |
except Exception as e: | |
return f"Error during Kaggle data loading: {e}" | |
# ---------- Processing (robust revenue + safe merges) ---------- | |
def create_sales_analytics(_tables): | |
try: | |
vbak = _tables['vbak'].copy() | |
vbap = _tables['vbap'].copy() | |
kna1 = _tables['kna1'].copy() | |
makt = _tables['makt'].copy() | |
# normalize column names | |
for df in [vbak, vbap, kna1, makt]: | |
df.columns = [c.upper().strip() for c in df.columns] | |
# keep SAP keys as strings (avoid leading-zero loss) | |
def as_str(df, cols): | |
for c in cols: | |
if c in df.columns: | |
df[c] = df[c].astype(str).str.strip() | |
as_str(vbak, ['VBELN','KUNNR','VKORG','VTWEG']) | |
as_str(vbap, ['VBELN','MATNR']) | |
as_str(kna1, ['KUNNR']) | |
as_str(makt, ['MATNR']) | |
# choose numeric helper | |
def pick_numeric(df, cols): | |
for c in cols: | |
if c in df.columns: | |
s = pd.to_numeric(df[c], errors='coerce') | |
if s.notna().sum() > 0 and s.abs().sum() > 0: | |
return s | |
return pd.Series(0.0, index=df.index) | |
# Build item-level REVENUE | |
# primary: NETWR at item level (VBAP) | |
netwr_item = pick_numeric(vbap, ['NETWR']) | |
# fallback: price * qty using common SAP columns | |
price = pick_numeric(vbap, ['NETPR', 'KBETR', 'NETPR_I']) | |
qty = pick_numeric(vbap, ['KWMENG', 'KTMNG', 'MENGE']) | |
fallback_rev = (price.fillna(0) * qty.fillna(0)).fillna(0) | |
vbap['REVENUE'] = np.where(netwr_item > 0, netwr_item, fallback_rev).astype(float) | |
# header fields (include currency if present) | |
keep_vbak = ['VBELN','KUNNR','VKORG','VTWEG','ERDAT'] + (['WAERK'] if 'WAERK' in vbak.columns else []) | |
vbak_small = vbak[keep_vbak].drop_duplicates('VBELN') | |
vbap_small = vbap[['VBELN','MATNR','REVENUE']] | |
kna1_small = kna1[['KUNNR','NAME1','LAND1']] if {'KUNNR','NAME1','LAND1'}.issubset(kna1.columns) else pd.DataFrame(columns=['KUNNR','NAME1','LAND1']) | |
# product text in English | |
makt_en = makt[makt['SPRAS'].eq('E')] if 'SPRAS' in makt.columns else makt | |
makt_small = makt_en[['MATNR','MAKTX']].drop_duplicates('MATNR') if {'MATNR','MAKTX'}.issubset(makt_en.columns) else pd.DataFrame(columns=['MATNR','MAKTX']) | |
# final sales table | |
sales = (vbap_small | |
.merge(vbak_small, on='VBELN', how='inner') | |
.merge(kna1_small, on='KUNNR', how='left') | |
.merge(makt_small, on='MATNR', how='left')) | |
if 'ERDAT' in sales.columns: | |
sales['ERDAT'] = pd.to_datetime(sales['ERDAT'], errors='coerce') | |
# ensure columns exist | |
for col in ['REVENUE','LAND1','VTWEG','NAME1','MAKTX','VBELN','KUNNR','VKORG']: | |
if col not in sales.columns: | |
sales[col] = np.nan if col != 'REVENUE' else 0.0 | |
if 'WAERK' not in sales.columns: | |
sales['WAERK'] = 'N/A' | |
# drop obvious junk rows | |
sales = sales.replace([np.inf, -np.inf], np.nan).dropna(subset=['REVENUE']) | |
return sales | |
except Exception as e: | |
return f"Error processing sales data: {e}" | |
# ---------- App ---------- | |
st.markdown('<div class="main-header">π― SAP Sales KPI Dashboard</div><div class="subtle">Real SAP ERP sample data (Kaggle)</div>', unsafe_allow_html=True) | |
tables = load_kaggle_sap_data() | |
if isinstance(tables, str): | |
st.error(tables) | |
st.stop() | |
sales_df = create_sales_analytics(tables) | |
if isinstance(sales_df, str): | |
st.error(sales_df) | |
st.stop() | |
# ---------- Filter Bar (no sidebar) ---------- | |
with st.container(): | |
st.markdown('<div class="filter-card">', unsafe_allow_html=True) | |
c1, c2, c3, c4 = st.columns([1.2, 1.2, 3, 0.9]) | |
# currency filter | |
currencies = [c for c in sales_df['WAERK'].dropna().unique().tolist() if c != 'N/A'] | |
default_cur = sales_df['WAERK'].mode().iat[0] if len(sales_df) and sales_df['WAERK'].notna().any() else 'N/A' | |
with c1: | |
currency = st.selectbox("Currency", options=(['All'] + sorted(currencies)) if currencies else ['All'], index=0 if currencies else 0) | |
# Top N | |
with c2: | |
top_n_countries = st.slider("Top N Countries", 5, 20, 10) | |
# Region multiselect inside expander to keep tidy | |
with c3: | |
with st.expander("Region (Country) β click to choose", expanded=False): | |
all_countries = sorted(sales_df['LAND1'].dropna().unique().tolist()) | |
# buttons to select/clear | |
b1, b2 = st.columns([1,1]) | |
if 'selected_countries' not in st.session_state: | |
st.session_state.selected_countries = all_countries | |
with b1: | |
if st.button("Select All"): | |
st.session_state.selected_countries = all_countries | |
with b2: | |
if st.button("Clear"): | |
st.session_state.selected_countries = [] | |
selected_region = st.multiselect("Countries", options=all_countries, default=st.session_state.selected_countries, key="countries_ms") | |
with c4: | |
if st.button("π Clear Cache"): | |
st.cache_data.clear() | |
st.rerun() | |
st.markdown('</div>', unsafe_allow_html=True) | |
# apply filters | |
filtered_df = sales_df.copy() | |
if currency and currency != 'All': | |
filtered_df = filtered_df[filtered_df['WAERK'] == currency] | |
if 'countries_ms' in st.session_state: | |
filtered_df = filtered_df[filtered_df['LAND1'].isin(st.session_state.countries_ms)] | |
st.success(f"β Loaded and processed {len(filtered_df):,} sales line-items after filters.") | |
# ---------- KPIs ---------- | |
st.subheader("Sales KPIs") | |
k1,k2,k3,k4 = st.columns(4) | |
with k1: st.markdown(f'<div class="kpi-card"><div class="kpi-value">${float(filtered_df["REVENUE"].sum()):,.0f}</div><div class="kpi-label">Total Revenue</div></div>', unsafe_allow_html=True) | |
with k2: st.markdown(f'<div class="kpi-card"><div class="kpi-value">{int(filtered_df["KUNNR"].nunique())}</div><div class="kpi-label">Active Customers</div></div>', unsafe_allow_html=True) | |
with k3: | |
aov = float(filtered_df.loc[filtered_df['REVENUE']>0,'REVENUE'].mean() or 0.0) | |
st.markdown(f'<div class="kpi-card"><div class="kpi-value">${aov:,.0f}</div><div class="kpi-label">Avg Order Value (item)</div></div>', unsafe_allow_html=True) | |
with k4: st.markdown(f'<div class="kpi-card"><div class="kpi-value">{int(filtered_df["VBELN"].nunique())}</div><div class="kpi-label">Sales Orders</div></div>', unsafe_allow_html=True) | |
# ---------- Tabs ---------- | |
tab1, tab2, tab3, tab4 = st.tabs(["π₯ Top Customers", "π Regional Analysis", "π Distribution Channels", "ποΈ Top Products"]) | |
with tab1: | |
st.subheader("Top 10 Customers by Revenue") | |
customer_summary = (filtered_df.dropna(subset=['NAME1']) | |
.groupby('NAME1', as_index=False)['REVENUE'].sum() | |
.nlargest(10, 'REVENUE')) | |
if customer_summary.empty: | |
st.info("No customer data to display.") | |
else: | |
fig = px.bar(customer_summary, x='REVENUE', y='NAME1', orientation='h', | |
labels={'REVENUE':'Revenue','NAME1':'Customer'}, color='REVENUE') | |
st.plotly_chart(fig.update_layout(yaxis={'categoryorder':'total ascending'}), use_container_width=True) | |
with tab2: | |
st.subheader("Revenue by Country") | |
regional_summary = (filtered_df.dropna(subset=['LAND1']) | |
.groupby('LAND1', as_index=False)['REVENUE'].sum() | |
.nlargest(top_n_countries, 'REVENUE')) | |
if regional_summary.empty: | |
st.info("No country data to display.") | |
else: | |
fig = px.pie(regional_summary, values='REVENUE', names='LAND1', | |
title=f"Top {top_n_countries} Countries by Revenue") | |
st.plotly_chart(fig, use_container_width=True) | |
with tab3: | |
st.subheader("Revenue by Distribution Channel") | |
channel_summary = (filtered_df.dropna(subset=['VTWEG']) | |
.groupby('VTWEG', as_index=False)['REVENUE'].sum()) | |
channel_summary['VTWEG'] = channel_summary['VTWEG'].astype(str) | |
if channel_summary.empty: | |
st.info("No distribution channel data to display.") | |
else: | |
fig = px.bar(channel_summary, x='VTWEG', y='REVENUE', | |
title="Total Revenue by Distribution Channel", | |
labels={'REVENUE':'Total Revenue','VTWEG':'Distribution Channel'}, | |
color='REVENUE') | |
st.plotly_chart(fig, use_container_width=True) | |
with tab4: | |
st.subheader("Top 10 Products by Revenue") | |
product_summary = (filtered_df.dropna(subset=['MAKTX']) | |
.groupby('MAKTX', as_index=False)['REVENUE'].sum() | |
.nlargest(10, 'REVENUE')) | |
if product_summary.empty: | |
st.info("No product data to display.") | |
else: | |
fig = px.bar(product_summary, x='REVENUE', y='MAKTX', orientation='h', | |
labels={'REVENUE':'Revenue','MAKTX':'Product'}, color='REVENUE') | |
st.plotly_chart(fig.update_layout(yaxis={'categoryorder':'total ascending'}), use_container_width=True) | |
st.markdown("---") | |
st.markdown("<p style='text-align:center;'>Built with Streamlit β’ Real SAP ERP sample data (Kaggle)</p>", unsafe_allow_html=True) | |