db_query / apps /parameters_distribution.py
DavMelchi's picture
Add percentage column and use plotly
e976fb1
import pandas as pd
import plotly.express as px
import streamlit as st
from pyxlsb import open_workbook
st.title("πŸ“Š Parameters distribution Analyzer")
uploaded_file = st.file_uploader("Upload an .xlsb Dump file", type="xlsb")
if uploaded_file:
# Get sheet names
with open_workbook(uploaded_file) as wb:
sheet_names = wb.sheets
# Dropdown for Object Class (sheet)
object_class = st.selectbox("Select Object Class (Sheet)", sheet_names)
if object_class:
# Read the selected sheet, skip first row
df: pd.DataFrame = pd.read_excel(
uploaded_file, sheet_name=object_class, skiprows=[0], engine="calamine"
)
if df.empty:
st.warning("The selected sheet is empty or couldn't be read.")
else:
parameters = st.multiselect("Select Parameter(s)", df.columns)
if parameters:
for param in parameters:
st.markdown(f"---\n### πŸ”Ή {param}")
col1, col2 = st.columns(2)
# Distribution table
with col1:
dist = df[param].value_counts(dropna=False).reset_index()
dist.columns = [param, "Count"]
dist["Percentage"] = (
dist["Count"] / dist["Count"].sum() * 100
).apply(lambda x: f"{x:.2f}%")
st.dataframe(dist, use_container_width=True)
# Bar chart
with col2:
chart_data = dist.set_index(param)
fig = px.bar(
chart_data.reset_index(),
x=param,
y="Count",
text_auto=True,
title=f"{param} Distribution",
hover_data=["Count", "Percentage"],
)
fig.update_xaxes(type="category")
st.plotly_chart(fig)