Spaces:

DavMelchi
/

db_query

Running

File size: 8,446 Bytes

0898fcc

from typing import List, Tuple

import pandas as pd
import plotly.express as px
import streamlit as st

from queries.process_gsm import combined_gsm_database
from utils.convert_to_excel import convert_gsm_dfs, save_dataframe
from utils.kpi_analysis_utils import create_hourly_date, kpi_naming_cleaning

# Constants
GSM_COLUMNS = [
    "ID_BTS",
    "BSC",
    "code",
    "Region",
    "locationAreaIdLAC",
    "Longitude",
    "Latitude",
]

TRX_COLUMNS = [
    "ID_BTS",
    "number_trx_per_cell",
    "number_tch_per_cell",
    "number_sd_per_cell",
    "number_bcch_per_cell",
    "number_ccch_per_cell",
    "number_cbc_per_cell",
    "number_total_channels_per_cell",
    "number_signals_per_cell",
]

KPI_COLUMNS = [
    "BSC_name",
    "BCF_name",
    "BTS_name",
    "Paging_messages_on_air_interface",
    "DELETE_PAGING_COMMAND_c003038",
    "datetime",
    "date",
    "hour",
]


def get_gsm_databases(dump_path: str) -> pd.DataFrame:
    """
    Process GSM database dump and return combined DataFrame with BTS and TRX data.

    Args:
        dump_path: Path to the GSM dump file

    Returns:
        pd.DataFrame: Combined DataFrame with BTS and TRX information
    """
    dfs = combined_gsm_database(dump_path)
    bts_df: pd.DataFrame = dfs[0]
    trx_df: pd.DataFrame = dfs[2]

    # Clean GSM df
    bts_df = bts_df[GSM_COLUMNS]
    trx_df = trx_df[TRX_COLUMNS]
    trx_df = trx_df.drop_duplicates(subset=["ID_BTS"])

    gsm_df = pd.merge(bts_df, trx_df, on="ID_BTS", how="left")

    # Create BSC_Lac column
    gsm_df["BSC_Lac"] = (
        gsm_df["BSC"].astype(str) + "_" + gsm_df["locationAreaIdLAC"].astype(str)
    )

    # Calculate number of TRX per LAC
    gsm_df["number_trx_per_lac"] = gsm_df.groupby("BSC_Lac")[
        "number_trx_per_cell"
    ].transform("sum")

    return gsm_df


def analyze_lac_load_kpi(hourly_report_path: str) -> pd.DataFrame:
    """
    Process hourly KPI report and prepare it for LAC load analysis.

    Args:
        hourly_report_path: Path to the hourly KPI report CSV file

    Returns:
        pd.DataFrame: Processed DataFrame with KPI data
    """
    df = pd.read_csv(hourly_report_path, delimiter=";")
    df = kpi_naming_cleaning(df)
    df = create_hourly_date(df)
    df = df[KPI_COLUMNS]

    # Clean and process BTS codes
    df = df[df["BTS_name"].str.len() >= 5]
    df["code"] = df["BTS_name"].str.split("_").str[0]
    df["code"] = pd.to_numeric(df["code"], errors="coerce").fillna(0).astype(int)

    return df


def analyze_lac_load(dump_path: str, hourly_report_path: str) -> List[pd.DataFrame]:
    """
    Analyze LAC load from GSM dump and hourly KPI report.

    Args:
        dump_path: Path to the GSM dump file
        hourly_report_path: Path to the hourly KPI report CSV file

    Returns:
        List containing two DataFrames: [lac_load_df, max_paging_df]
    """
    gsm_df = get_gsm_databases(dump_path)
    lac_load_df = analyze_lac_load_kpi(hourly_report_path)
    lac_load_df = pd.merge(gsm_df, lac_load_df, on="code", how="left")

    # Aggregate data
    lac_load_df = (
        lac_load_df.groupby(
            [
                "datetime",
                "date",
                "hour",
                "BSC_name",
                "BSC_Lac",
                "number_trx_per_lac",
            ]
        )
        .agg(
            {
                "Paging_messages_on_air_interface": "max",
                "DELETE_PAGING_COMMAND_c003038": "max",
            }
        )
        .reset_index()
    )

    # Get max paging messages
    max_paging_messages = lac_load_df.sort_values(
        by=["BSC_Lac", "Paging_messages_on_air_interface"], ascending=False
    ).drop_duplicates(subset=["BSC_Lac"], keep="first")[
        [
            "BSC_name",
            "BSC_Lac",
            "number_trx_per_lac",
            "Paging_messages_on_air_interface",
        ]
    ]

    # Get max delete paging commands
    max_delete_paging = lac_load_df.sort_values(
        by=["BSC_Lac", "DELETE_PAGING_COMMAND_c003038"], ascending=False
    ).drop_duplicates(subset=["BSC_Lac"], keep="first")[
        ["BSC_name", "BSC_Lac", "DELETE_PAGING_COMMAND_c003038"]
    ]

    # Merge results
    max_paging_df = pd.merge(
        max_paging_messages,
        max_delete_paging,
        on=["BSC_name", "BSC_Lac"],
        how="left",
    )

    # Calculate utilization (paging/640800)
    max_paging_df["Utilization"] = (
        (max_paging_df["Paging_messages_on_air_interface"] / 640800) * 100
    ).round(2)

    return [lac_load_df, max_paging_df]


def display_ui() -> None:
    """Display the Streamlit user interface."""
    st.title(" 📊 GSM LAC Load Analysis")
    doc_col, image_col = st.columns(2)

    with doc_col:
        st.write(
            """
            The report should be run with a minimum of 7 days of data.
            - Dump file required
            - Hourly Report in CSV format
            """
        )

    with image_col:
        st.image("./assets/gsm_lac_load.png", width=250)


@st.fragment
def display_filtered_lac_load(lac_load_df: pd.DataFrame) -> None:
    """
    Display filtered LAC load data with interactive charts.

    Args:
        lac_load_df: DataFrame containing LAC load data
    """
    st.write("### Filtered LAC Load by BSC and BSC_Lac")

    bsc_col, bsc_lac_col = st.columns(2)

    with bsc_col:
        selected_bsc = st.multiselect(
            "Select BSC",
            lac_load_df["BSC_name"].unique(),
            key="selected_bsc",
            default=[lac_load_df["BSC_name"].unique()[0]],
        )

    with bsc_lac_col:
        selected_bsc_lac = st.multiselect(
            "Select BSC_Lac",
            lac_load_df[lac_load_df["BSC_name"].isin(selected_bsc)]["BSC_Lac"].unique(),
            key="selected_bsc_lac",
            default=lac_load_df[lac_load_df["BSC_name"].isin(selected_bsc)][
                "BSC_Lac"
            ].unique(),
        )

    filtered_lac_load_df = lac_load_df[
        lac_load_df["BSC_name"].isin(selected_bsc)
        & lac_load_df["BSC_Lac"].isin(selected_bsc_lac)
    ]

    # Display charts
    chart1, chart2 = st.columns(2)
    with chart1:
        st.write("### Paging Messages on Air Interface")
        fig1 = px.line(
            filtered_lac_load_df,
            x="datetime",
            y="Paging_messages_on_air_interface",
            color="BSC_Lac",
            title="Max Paging Messages on Air Interface Per BSC_Lac",
        )
        fig1.update_layout(
            xaxis_title="<b>Datetime</b>",
            yaxis_title="<b>Paging Messages on Air Interface</b>",
        )
        fig1.add_hline(y=256000, line_color="red", line_dash="dash", line_width=2)
        st.plotly_chart(fig1)

    with chart2:
        st.write("### Delete Paging Commands")
        fig2 = px.line(
            filtered_lac_load_df,
            x="datetime",
            y="DELETE_PAGING_COMMAND_c003038",
            color="BSC_Lac",
            title="Max Delete Paging Commands Per BSC_Lac",
        )
        fig2.update_layout(
            xaxis_title="<b>Datetime</b>",
            yaxis_title="<b>Delete Paging Commands</b>",
        )
        st.plotly_chart(fig2)

    st.write("### Filtered LAC Load Data")
    st.dataframe(filtered_lac_load_df)


def main() -> None:
    """Main function to run the Streamlit app."""
    display_ui()

    # File uploaders
    file1, file2 = st.columns(2)
    with file1:
        uploaded_dump = st.file_uploader("Upload Dump file in xlsb format", type="xlsb")
    with file2:
        uploaded_hourly_report = st.file_uploader(
            "Upload Hourly Report in CSV format", type="csv"
        )

    if uploaded_dump is not None and uploaded_hourly_report is not None:
        if st.button("Analyze Data", type="primary"):
            with st.spinner("Analyzing data..."):
                dfs = analyze_lac_load(
                    dump_path=uploaded_dump,
                    hourly_report_path=uploaded_hourly_report,
                )

                lac_load_df = dfs[0]
                max_paging_df = dfs[1]

                if lac_load_df is not None and "lac_load_df" not in st.session_state:
                    st.session_state.lac_load_df = lac_load_df
                    st.write("### LAC Load and Utilization with Max Paging 640800")
                    st.dataframe(max_paging_df)
                    display_filtered_lac_load(lac_load_df)


if __name__ == "__main__":
    main()