Lars Masanneck
Restore weight label in biomarker mapping
c7aa9c1
import streamlit as st
import normalizer_model
import numpy as np
import pandas as pd
import altair as alt
import plotly.graph_objects as go
from scipy.stats import norm
# Configure the Streamlit page before other commands
st.set_page_config(
page_title="Smartwatch Normative Z-Score Calculator",
layout="wide",
)
# Cache the normative DataFrame load
def load_norm_df(path: str):
return normalizer_model.load_normative_table(path)
load_norm_df = st.cache_data(load_norm_df)
# Load dataset
norm_df = load_norm_df("Table_1_summary_measure.csv")
# Friendly biomarker labels
BIOMARKER_LABELS = {
"nb_steps": "Number of Steps",
"max_steps": "Maximum Steps",
"mean_active_time": "Mean Active Time",
"sbp": "Systolic Blood Pressure",
"dbp": "Diastolic Blood Pressure",
"sleep_duration": "Sleep Duration",
"avg_night_hr": "Average Night Heart Rate",
"nb_moderate_active_minutes": "Moderate Active Minutes",
"nb_vigorous_active_minutes": "Vigorous Active Minutes",
"weight": "Weight",
"pwv": "Pulse Wave Velocity",
# add any others here
}
# Biomarkers temporarily disabled in the UI. Remove from this set to re-enable.
DISABLED_BIOMARKERS = {"weight"}
def main():
if "disclaimer_shown" not in st.session_state:
st.info(
"These calculations are dedicated for scientifically purposes only. "
"For detailed questions regarding personal health data contact your "
"healthcare professionals."
)
st.session_state.disclaimer_shown = True
st.title("Smartwatch Normative Z-Score Calculator")
st.sidebar.header("Input Parameters")
# Region with default Western Europe
regions = sorted(norm_df["area"].unique())
if "Western Europe" in regions:
default_region = "Western Europe"
else:
default_region = regions[0]
region = st.sidebar.selectbox(
"Region",
regions,
index=regions.index(default_region),
)
# Gender selection
gender = st.sidebar.selectbox(
"Gender",
sorted(norm_df["gender"].unique()),
)
# Age input: choose between years or group
st.sidebar.subheader("Age Input")
age_input_mode = st.sidebar.radio(
"Age input mode",
("Years", "Group"),
)
if age_input_mode == "Years":
age_years = st.sidebar.number_input(
"Age (years)",
min_value=0,
max_value=120,
value=30,
step=1,
)
age_param = age_years
else:
age_groups = sorted(
norm_df["Age"].unique(),
key=lambda x: int(x.split("-")[0]),
)
age_group = st.sidebar.selectbox("Age group", [""] + age_groups)
age_param = age_group
# BMI input: choose between value or category
st.sidebar.subheader("BMI Input")
bmi_input_mode = st.sidebar.radio(
"BMI input mode",
("Value", "Category"),
)
if bmi_input_mode == "Value":
bmi_val = st.sidebar.number_input(
"BMI",
min_value=0.0,
max_value=100.0,
value=24.0,
step=0.1,
format="%.1f",
)
bmi_param = bmi_val
else:
bmi_cats = sorted(norm_df["Bmi"].unique())
bmi_cat = st.sidebar.selectbox("BMI category", [""] + bmi_cats)
bmi_param = bmi_cat
# Biomarker selection with friendly labels
codes = sorted(
c for c in norm_df["Biomarkers"].unique() if c not in DISABLED_BIOMARKERS
)
friendly = [BIOMARKER_LABELS.get(c, c.title()) for c in codes]
default_idx = friendly.index("Number of Steps")
selected_label = st.sidebar.selectbox(
"Biomarker",
friendly,
index=default_idx,
)
biomarker = codes[friendly.index(selected_label)]
# Value input with consistent float types
default_value = 6500.0 if biomarker == "nb_steps" else 0.0
# Determine upper bound from normative data
mask = norm_df["Biomarkers"].str.lower() == biomarker.lower()
max_val = float(norm_df.loc[mask, "max"].max())
value = st.sidebar.number_input(
f"{selected_label} value",
min_value=0.0,
max_value=max_val,
value=default_value,
step=1.0,
)
# Compute
norm_button = st.sidebar.button("Compute Normative Z-Score")
if norm_button:
try:
res = normalizer_model.compute_normative_position(
value=value,
biomarker=biomarker,
age_group=age_param,
region=region,
gender=gender,
bmi=bmi_param,
normative_df=norm_df,
)
except Exception as e:
st.error(f"Error: {e}")
return
# Show metrics
st.subheader("Results")
m1, m2, m3, m4, m5 = st.columns(5)
m1.metric("Z-Score", f"{res['z_score']:.2f}")
m2.metric("Percentile", f"{res['percentile']:.2f}")
m3.metric("Mean", f"{res['mean']:.2f}")
m4.metric("SD", f"{res['sd']:.2f}")
m5.metric("Sample Size", res["n"])
# Compute actual age group and BMI category for cohort summary
age_group_str = normalizer_model._categorize_age(age_param, norm_df)
bmi_cat = normalizer_model.categorize_bmi(bmi_param)
st.markdown(
f"**Basis of calculation:** Data from region **{region}**, "
f"gender **{gender}**, age group **{age_group_str}**, "
f"and BMI category **{bmi_cat}. "
f"Sample size: {res['n']}**."
)
# Detailed statistics table
st.subheader("Detailed Statistics")
stats_df = pd.DataFrame(
{
"Statistic": [
"Z-Score",
"Percentile",
"Mean",
"SD",
"Sample Size",
"Median",
"Q1",
"Q3",
"IQR",
"MAD",
"SE",
"CI",
],
"Value": [
f"{res['z_score']:.2f}",
f"{res['percentile']:.2f}",
f"{res['mean']:.2f}",
f"{res['sd']:.2f}",
res.get("n", "N/A"),
f"{res.get('median', float('nan')):.2f}",
f"{res.get('q1', float('nan')):.2f}",
f"{res.get('q3', float('nan')):.2f}",
f"{res.get('iqr', float('nan')):.2f}",
f"{res.get('mad', float('nan')):.2f}",
f"{res.get('se', float('nan')):.2f}",
f"{res.get('ci', float('nan')):.2f}",
],
}
)
st.table(stats_df)
# Normality assumption note
note = (
"*Note: Percentile and z-score estimation assume a normal "
"distribution based on global Withings user data stratified by "
"the parameters entered.*"
)
st.write(note)
# Normality checks
import normality_checks as nc
R = nc.iqr_tail_heaviness(res["iqr"], res["sd"])
q1_z, q3_z = nc.quartile_z_scores(
res["mean"],
res["sd"],
res["q1"],
res["q3"],
)
skew = nc.pearson_skewness(res["mean"], res["median"], res["sd"])
st.subheader("Normality Heuristics")
# Determine skewness interpretation
if abs(skew) <= 0.1:
skew_interp = "Symmetric (OK)"
elif abs(skew) <= 0.5:
skew_interp = f"{'Right' if skew > 0 else 'Left'} slight skew (usually OK)"
elif abs(skew) <= 1.0:
skew_interp = f"{'Right' if skew > 0 else 'Left'} noticeable skew"
else:
skew_interp = f"{'Right' if skew > 0 else 'Left'} strong skew"
norm_checks = pd.DataFrame(
{
"Check": [
"IQR/SD",
"Q1 z-score",
"Q3 z-score",
"Pearson Skewness",
],
"Value": [
f"{R:.2f}",
f"{q1_z:.2f}",
f"{q3_z:.2f}",
f"{skew:.2f}",
],
"Flag": [
(
"Heavier tails"
if R > 1.5
else "Lighter tails" if R < 1.2 else "OK"
),
"Deviation" if abs(q1_z + 0.6745) > 0.1 else "OK",
"Deviation" if abs(q3_z - 0.6745) > 0.1 else "OK",
skew_interp,
],
}
)
st.table(norm_checks)
# Add skewness interpretation guide
st.markdown(
"""
**Pearson Skewness Interpretation:**
- ≈ 0: Symmetric distribution
- ±0.1 to ±0.5: Slight/moderate skew
- ±0.5 to ±1: Noticeable skew
- larger than±1: Strong skew
- Positive values: Right skew (longer tail on right)
- Negative values: Left skew (longer tail on left)
"""
)
# Warning if heuristic checks indicate non-normality
if any(("OK" not in val) for val in norm_checks["Flag"]):
st.warning(
"Warning: Heuristic checks indicate possible deviations "
"from normality; interpret z-score and percentiles with "
"caution."
)
# Skew-Corrected Results (optional)
with st.expander("Optional: Skew-Corrected Results"):
st.write("Adjusts for skew via Pearson Type III back-transform.")
st.write("Error often <1 percentile point when |skew| ≤ 0.5.")
st.write("Usually more useful for stronger skewed distributions.")
st.write("Note: This is a heuristic and may not always be accurate.")
res_skew = normalizer_model.compute_skew_corrected_position(
value=value,
mean=res["mean"],
sd=res["sd"],
median=res["median"],
)
pct_skew = f"{res_skew['percentile_skew_corrected']:.2f}"
sc1, sc2 = st.columns(2)
sc1.metric(
"Skew-Corrected Z-Score",
f"{res_skew['z_skew_corrected']:.2f}",
)
sc2.metric(
"Skew-Corrected Percentile",
pct_skew,
)
st.markdown("---")
st.subheader("Visualizations")
# Prepare data for normal distribution
z_vals = np.linspace(-4, 4, 400)
density = norm.pdf(z_vals)
df_chart = pd.DataFrame({"z": z_vals, "density": density})
# Shade area up to observed z-score
area = (
alt.Chart(df_chart)
.mark_area(color="orange", opacity=0.3)
.transform_filter(alt.datum.z <= res["z_score"])
.encode(
x=alt.X(
"z:Q",
title="z-score",
),
y=alt.Y(
"density:Q",
title="Density",
),
)
)
# Plot distribution line
line = (
alt.Chart(df_chart)
.mark_line(color="orange")
.encode(
x="z:Q",
y="density:Q",
)
)
# Vertical line at observed z
vline = (
alt.Chart(pd.DataFrame({"z": [res["z_score"]]}))
.mark_rule(color="orange")
.encode(x="z:Q")
)
chart = (area + line + vline).properties(
width=600,
height=300,
title="Standard Normal Distribution",
)
st.altair_chart(chart, use_container_width=True)
# Text summary
st.write(
f"Your value is z = {res['z_score']:.2f}, which places you in "
f"the {res['percentile']:.1f}th percentile of a normal "
f"distribution."
)
# Bullet chart showing z-score location
# Using a horizontal bullet gauge from -3 to 3 SD
bullet = go.Figure(
go.Indicator(
mode="number+gauge",
value=res["z_score"],
number={"suffix": " SD"},
gauge={
"shape": "bullet",
"axis": {
"range": [-3, 3],
"tickmode": "linear",
"dtick": 0.5,
},
"bar": {"color": "orange"},
},
)
)
bullet.update_layout(
height=150,
margin={"t": 20, "b": 20, "l": 20, "r": 20},
)
st.plotly_chart(bullet, use_container_width=True)
# Show percentile text
st.write(f"Percentile: {res['percentile']:.1f}%")
else:
st.sidebar.info(
"Fill in all inputs and click Compute " "to get normative Z-score."
)
# Footer
st.markdown("---")
st.markdown(
"Built in with ❤️ in Düsseldorf. © Lars Masanneck 2025. "
"Thanks to Withings for sharing this data openly."
)
if __name__ == "__main__":
main()