Spaces:

MasanneckLab
/

Withings_Normalization_App

Sleeping

File size: 13,426 Bytes

import streamlit as st
import normalizer_model
import numpy as np
import pandas as pd
import altair as alt
import plotly.graph_objects as go
from scipy.stats import norm

# Configure the Streamlit page before other commands
st.set_page_config(
    page_title="Smartwatch Normative Z-Score Calculator",
    layout="wide",
)


# Cache the normative DataFrame load
def load_norm_df(path: str):
    return normalizer_model.load_normative_table(path)


load_norm_df = st.cache_data(load_norm_df)

# Load dataset
norm_df = load_norm_df("Table_1_summary_measure.csv")

# Friendly biomarker labels
BIOMARKER_LABELS = {
    "nb_steps": "Number of Steps",
    "max_steps": "Maximum Steps",
    "mean_active_time": "Mean Active Time",
    "sbp": "Systolic Blood Pressure",
    "dbp": "Diastolic Blood Pressure",
    "sleep_duration": "Sleep Duration",
    "avg_night_hr": "Average Night Heart Rate",
    "nb_moderate_active_minutes": "Moderate Active Minutes",
    "nb_vigorous_active_minutes": "Vigorous Active Minutes",
    "weight": "Weight",
    "pwv": "Pulse Wave Velocity",
    # add any others here
}

# Biomarkers temporarily disabled in the UI. Remove from this set to re-enable.
DISABLED_BIOMARKERS = {"weight"}


def main():
    if "disclaimer_shown" not in st.session_state:
        st.info(
            "These calculations are dedicated for scientifically purposes only. "
            "For detailed questions regarding personal health data contact your "
            "healthcare professionals."
        )
        st.session_state.disclaimer_shown = True
    st.title("Smartwatch Normative Z-Score Calculator")
    st.sidebar.header("Input Parameters")

    # Region with default Western Europe
    regions = sorted(norm_df["area"].unique())
    if "Western Europe" in regions:
        default_region = "Western Europe"
    else:
        default_region = regions[0]
    region = st.sidebar.selectbox(
        "Region",
        regions,
        index=regions.index(default_region),
    )

    # Gender selection
    gender = st.sidebar.selectbox(
        "Gender",
        sorted(norm_df["gender"].unique()),
    )

    # Age input: choose between years or group
    st.sidebar.subheader("Age Input")
    age_input_mode = st.sidebar.radio(
        "Age input mode",
        ("Years", "Group"),
    )
    if age_input_mode == "Years":
        age_years = st.sidebar.number_input(
            "Age (years)",
            min_value=0,
            max_value=120,
            value=30,
            step=1,
        )
        age_param = age_years
    else:
        age_groups = sorted(
            norm_df["Age"].unique(),
            key=lambda x: int(x.split("-")[0]),
        )
        age_group = st.sidebar.selectbox("Age group", [""] + age_groups)
        age_param = age_group

    # BMI input: choose between value or category
    st.sidebar.subheader("BMI Input")
    bmi_input_mode = st.sidebar.radio(
        "BMI input mode",
        ("Value", "Category"),
    )
    if bmi_input_mode == "Value":
        bmi_val = st.sidebar.number_input(
            "BMI",
            min_value=0.0,
            max_value=100.0,
            value=24.0,
            step=0.1,
            format="%.1f",
        )
        bmi_param = bmi_val
    else:
        bmi_cats = sorted(norm_df["Bmi"].unique())
        bmi_cat = st.sidebar.selectbox("BMI category", [""] + bmi_cats)
        bmi_param = bmi_cat

    # Biomarker selection with friendly labels
    codes = sorted(
        c for c in norm_df["Biomarkers"].unique() if c not in DISABLED_BIOMARKERS
    )
    friendly = [BIOMARKER_LABELS.get(c, c.title()) for c in codes]
    default_idx = friendly.index("Number of Steps")
    selected_label = st.sidebar.selectbox(
        "Biomarker",
        friendly,
        index=default_idx,
    )
    biomarker = codes[friendly.index(selected_label)]

    # Value input with consistent float types
    default_value = 6500.0 if biomarker == "nb_steps" else 0.0
    # Determine upper bound from normative data
    mask = norm_df["Biomarkers"].str.lower() == biomarker.lower()
    max_val = float(norm_df.loc[mask, "max"].max())
    value = st.sidebar.number_input(
        f"{selected_label} value",
        min_value=0.0,
        max_value=max_val,
        value=default_value,
        step=1.0,
    )

    # Compute
    norm_button = st.sidebar.button("Compute Normative Z-Score")
    if norm_button:
        try:
            res = normalizer_model.compute_normative_position(
                value=value,
                biomarker=biomarker,
                age_group=age_param,
                region=region,
                gender=gender,
                bmi=bmi_param,
                normative_df=norm_df,
            )
        except Exception as e:
            st.error(f"Error: {e}")
            return

        # Show metrics
        st.subheader("Results")
        m1, m2, m3, m4, m5 = st.columns(5)
        m1.metric("Z-Score", f"{res['z_score']:.2f}")
        m2.metric("Percentile", f"{res['percentile']:.2f}")
        m3.metric("Mean", f"{res['mean']:.2f}")
        m4.metric("SD", f"{res['sd']:.2f}")
        m5.metric("Sample Size", res["n"])

        # Compute actual age group and BMI category for cohort summary
        age_group_str = normalizer_model._categorize_age(age_param, norm_df)
        bmi_cat = normalizer_model.categorize_bmi(bmi_param)
        st.markdown(
            f"**Basis of calculation:** Data from region **{region}**, "
            f"gender **{gender}**, age group **{age_group_str}**, "
            f"and BMI category **{bmi_cat}. "
            f"Sample size: {res['n']}**."
        )

        # Detailed statistics table
        st.subheader("Detailed Statistics")
        stats_df = pd.DataFrame(
            {
                "Statistic": [
                    "Z-Score",
                    "Percentile",
                    "Mean",
                    "SD",
                    "Sample Size",
                    "Median",
                    "Q1",
                    "Q3",
                    "IQR",
                    "MAD",
                    "SE",
                    "CI",
                ],
                "Value": [
                    f"{res['z_score']:.2f}",
                    f"{res['percentile']:.2f}",
                    f"{res['mean']:.2f}",
                    f"{res['sd']:.2f}",
                    res.get("n", "N/A"),
                    f"{res.get('median', float('nan')):.2f}",
                    f"{res.get('q1', float('nan')):.2f}",
                    f"{res.get('q3', float('nan')):.2f}",
                    f"{res.get('iqr', float('nan')):.2f}",
                    f"{res.get('mad', float('nan')):.2f}",
                    f"{res.get('se', float('nan')):.2f}",
                    f"{res.get('ci', float('nan')):.2f}",
                ],
            }
        )
        st.table(stats_df)

        # Normality assumption note
        note = (
            "*Note: Percentile and z-score estimation assume a normal "
            "distribution based on global Withings user data stratified by "
            "the parameters entered.*"
        )
        st.write(note)

        # Normality checks
        import normality_checks as nc

        R = nc.iqr_tail_heaviness(res["iqr"], res["sd"])
        q1_z, q3_z = nc.quartile_z_scores(
            res["mean"],
            res["sd"],
            res["q1"],
            res["q3"],
        )
        skew = nc.pearson_skewness(res["mean"], res["median"], res["sd"])
        st.subheader("Normality Heuristics")

        # Determine skewness interpretation
        if abs(skew) <= 0.1:
            skew_interp = "Symmetric (OK)"
        elif abs(skew) <= 0.5:
            skew_interp = f"{'Right' if skew > 0 else 'Left'} slight skew (usually OK)"
        elif abs(skew) <= 1.0:
            skew_interp = f"{'Right' if skew > 0 else 'Left'} noticeable skew"
        else:
            skew_interp = f"{'Right' if skew > 0 else 'Left'} strong skew"

        norm_checks = pd.DataFrame(
            {
                "Check": [
                    "IQR/SD",
                    "Q1 z-score",
                    "Q3 z-score",
                    "Pearson Skewness",
                ],
                "Value": [
                    f"{R:.2f}",
                    f"{q1_z:.2f}",
                    f"{q3_z:.2f}",
                    f"{skew:.2f}",
                ],
                "Flag": [
                    (
                        "Heavier tails"
                        if R > 1.5
                        else "Lighter tails" if R < 1.2 else "OK"
                    ),
                    "Deviation" if abs(q1_z + 0.6745) > 0.1 else "OK",
                    "Deviation" if abs(q3_z - 0.6745) > 0.1 else "OK",
                    skew_interp,
                ],
            }
        )
        st.table(norm_checks)

        # Add skewness interpretation guide
        st.markdown(
            """
        **Pearson Skewness Interpretation:**
        - ≈ 0: Symmetric distribution
        - ±0.1 to ±0.5: Slight/moderate skew
        - ±0.5 to ±1: Noticeable skew
        - larger than±1: Strong skew

        - Positive values: Right skew (longer tail on right)
        - Negative values: Left skew (longer tail on left)
        """
        )

        # Warning if heuristic checks indicate non-normality
        if any(("OK" not in val) for val in norm_checks["Flag"]):
            st.warning(
                "Warning: Heuristic checks indicate possible deviations "
                "from normality; interpret z-score and percentiles with "
                "caution."
            )

        # Skew-Corrected Results (optional)
        with st.expander("Optional: Skew-Corrected Results"):
            st.write("Adjusts for skew via Pearson Type III back-transform.")
            st.write("Error often <1 percentile point when |skew| ≤ 0.5.")
            st.write("Usually more useful for stronger skewed distributions.")
            st.write("Note: This is a heuristic and may not always be accurate.")
            res_skew = normalizer_model.compute_skew_corrected_position(
                value=value,
                mean=res["mean"],
                sd=res["sd"],
                median=res["median"],
            )
            pct_skew = f"{res_skew['percentile_skew_corrected']:.2f}"
            sc1, sc2 = st.columns(2)
            sc1.metric(
                "Skew-Corrected Z-Score",
                f"{res_skew['z_skew_corrected']:.2f}",
            )
            sc2.metric(
                "Skew-Corrected Percentile",
                pct_skew,
            )

        st.markdown("---")
        st.subheader("Visualizations")
        # Prepare data for normal distribution
        z_vals = np.linspace(-4, 4, 400)
        density = norm.pdf(z_vals)
        df_chart = pd.DataFrame({"z": z_vals, "density": density})
        # Shade area up to observed z-score
        area = (
            alt.Chart(df_chart)
            .mark_area(color="orange", opacity=0.3)
            .transform_filter(alt.datum.z <= res["z_score"])
            .encode(
                x=alt.X(
                    "z:Q",
                    title="z-score",
                ),
                y=alt.Y(
                    "density:Q",
                    title="Density",
                ),
            )
        )
        # Plot distribution line
        line = (
            alt.Chart(df_chart)
            .mark_line(color="orange")
            .encode(
                x="z:Q",
                y="density:Q",
            )
        )
        # Vertical line at observed z
        vline = (
            alt.Chart(pd.DataFrame({"z": [res["z_score"]]}))
            .mark_rule(color="orange")
            .encode(x="z:Q")
        )
        chart = (area + line + vline).properties(
            width=600,
            height=300,
            title="Standard Normal Distribution",
        )
        st.altair_chart(chart, use_container_width=True)
        # Text summary
        st.write(
            f"Your value is z = {res['z_score']:.2f}, which places you in "
            f"the {res['percentile']:.1f}th percentile of a normal "
            f"distribution."
        )
        # Bullet chart showing z-score location
        # Using a horizontal bullet gauge from -3 to 3 SD
        bullet = go.Figure(
            go.Indicator(
                mode="number+gauge",
                value=res["z_score"],
                number={"suffix": " SD"},
                gauge={
                    "shape": "bullet",
                    "axis": {
                        "range": [-3, 3],
                        "tickmode": "linear",
                        "dtick": 0.5,
                    },
                    "bar": {"color": "orange"},
                },
            )
        )
        bullet.update_layout(
            height=150,
            margin={"t": 20, "b": 20, "l": 20, "r": 20},
        )
        st.plotly_chart(bullet, use_container_width=True)
        # Show percentile text
        st.write(f"Percentile: {res['percentile']:.1f}%")
    else:
        st.sidebar.info(
            "Fill in all inputs and click Compute " "to get normative Z-score."
        )

    # Footer
    st.markdown("---")
    st.markdown(
        "Built in with ❤️ in Düsseldorf. © Lars Masanneck 2025. "
        "Thanks to Withings for sharing this data openly."
    )


if __name__ == "__main__":
    main()