|
from typing import List, Tuple |
|
|
|
import pandas as pd |
|
import plotly.express as px |
|
import streamlit as st |
|
|
|
from queries.process_gsm import combined_gsm_database |
|
from utils.convert_to_excel import convert_gsm_dfs, save_dataframe |
|
from utils.kpi_analysis_utils import create_hourly_date, kpi_naming_cleaning |
|
|
|
|
|
GSM_COLUMNS = [ |
|
"ID_BTS", |
|
"BSC", |
|
"code", |
|
"Region", |
|
"locationAreaIdLAC", |
|
"Longitude", |
|
"Latitude", |
|
] |
|
|
|
TRX_COLUMNS = [ |
|
"ID_BTS", |
|
"number_trx_per_cell", |
|
"number_tch_per_cell", |
|
"number_sd_per_cell", |
|
"number_bcch_per_cell", |
|
"number_ccch_per_cell", |
|
"number_cbc_per_cell", |
|
"number_total_channels_per_cell", |
|
"number_signals_per_cell", |
|
] |
|
|
|
KPI_COLUMNS = [ |
|
"BSC_name", |
|
"BCF_name", |
|
"BTS_name", |
|
"Paging_messages_on_air_interface", |
|
"DELETE_PAGING_COMMAND_c003038", |
|
"datetime", |
|
"date", |
|
"hour", |
|
] |
|
|
|
|
|
def get_gsm_databases(dump_path: str) -> pd.DataFrame: |
|
""" |
|
Process GSM database dump and return combined DataFrame with BTS and TRX data. |
|
|
|
Args: |
|
dump_path: Path to the GSM dump file |
|
|
|
Returns: |
|
pd.DataFrame: Combined DataFrame with BTS and TRX information |
|
""" |
|
dfs = combined_gsm_database(dump_path) |
|
bts_df: pd.DataFrame = dfs[0] |
|
trx_df: pd.DataFrame = dfs[2] |
|
|
|
|
|
bts_df = bts_df[GSM_COLUMNS] |
|
trx_df = trx_df[TRX_COLUMNS] |
|
trx_df = trx_df.drop_duplicates(subset=["ID_BTS"]) |
|
|
|
gsm_df = pd.merge(bts_df, trx_df, on="ID_BTS", how="left") |
|
|
|
|
|
gsm_df["BSC_Lac"] = ( |
|
gsm_df["BSC"].astype(str) + "_" + gsm_df["locationAreaIdLAC"].astype(str) |
|
) |
|
|
|
|
|
gsm_df["number_trx_per_lac"] = gsm_df.groupby("BSC_Lac")[ |
|
"number_trx_per_cell" |
|
].transform("sum") |
|
|
|
return gsm_df |
|
|
|
|
|
def analyze_lac_load_kpi(hourly_report_path: str) -> pd.DataFrame: |
|
""" |
|
Process hourly KPI report and prepare it for LAC load analysis. |
|
|
|
Args: |
|
hourly_report_path: Path to the hourly KPI report CSV file |
|
|
|
Returns: |
|
pd.DataFrame: Processed DataFrame with KPI data |
|
""" |
|
df = pd.read_csv(hourly_report_path, delimiter=";") |
|
df = kpi_naming_cleaning(df) |
|
df = create_hourly_date(df) |
|
df = df[KPI_COLUMNS] |
|
|
|
|
|
df = df[df["BTS_name"].str.len() >= 5] |
|
df["code"] = df["BTS_name"].str.split("_").str[0] |
|
df["code"] = pd.to_numeric(df["code"], errors="coerce").fillna(0).astype(int) |
|
|
|
return df |
|
|
|
|
|
def analyze_lac_load(dump_path: str, hourly_report_path: str) -> List[pd.DataFrame]: |
|
""" |
|
Analyze LAC load from GSM dump and hourly KPI report. |
|
|
|
Args: |
|
dump_path: Path to the GSM dump file |
|
hourly_report_path: Path to the hourly KPI report CSV file |
|
|
|
Returns: |
|
List containing two DataFrames: [lac_load_df, max_paging_df] |
|
""" |
|
gsm_df = get_gsm_databases(dump_path) |
|
lac_load_df = analyze_lac_load_kpi(hourly_report_path) |
|
lac_load_df = pd.merge(gsm_df, lac_load_df, on="code", how="left") |
|
|
|
|
|
lac_load_df = ( |
|
lac_load_df.groupby( |
|
[ |
|
"datetime", |
|
"date", |
|
"hour", |
|
"BSC_name", |
|
"BSC_Lac", |
|
"number_trx_per_lac", |
|
] |
|
) |
|
.agg( |
|
{ |
|
"Paging_messages_on_air_interface": "max", |
|
"DELETE_PAGING_COMMAND_c003038": "max", |
|
} |
|
) |
|
.reset_index() |
|
) |
|
|
|
|
|
max_paging_messages = lac_load_df.sort_values( |
|
by=["BSC_Lac", "Paging_messages_on_air_interface"], ascending=False |
|
).drop_duplicates(subset=["BSC_Lac"], keep="first")[ |
|
[ |
|
"BSC_name", |
|
"BSC_Lac", |
|
"number_trx_per_lac", |
|
"Paging_messages_on_air_interface", |
|
] |
|
] |
|
|
|
|
|
max_delete_paging = lac_load_df.sort_values( |
|
by=["BSC_Lac", "DELETE_PAGING_COMMAND_c003038"], ascending=False |
|
).drop_duplicates(subset=["BSC_Lac"], keep="first")[ |
|
["BSC_name", "BSC_Lac", "DELETE_PAGING_COMMAND_c003038"] |
|
] |
|
|
|
|
|
max_paging_df = pd.merge( |
|
max_paging_messages, |
|
max_delete_paging, |
|
on=["BSC_name", "BSC_Lac"], |
|
how="left", |
|
) |
|
|
|
|
|
max_paging_df["Utilization"] = ( |
|
(max_paging_df["Paging_messages_on_air_interface"] / 640800) * 100 |
|
).round(2) |
|
|
|
return [lac_load_df, max_paging_df] |
|
|
|
|
|
def display_ui() -> None: |
|
"""Display the Streamlit user interface.""" |
|
st.title(" 📊 GSM LAC Load Analysis") |
|
doc_col, image_col = st.columns(2) |
|
|
|
with doc_col: |
|
st.write( |
|
""" |
|
The report should be run with a minimum of 7 days of data. |
|
- Dump file required |
|
- Hourly Report in CSV format |
|
""" |
|
) |
|
|
|
with image_col: |
|
st.image("./assets/gsm_lac_load.png", width=250) |
|
|
|
|
|
@st.fragment |
|
def display_filtered_lac_load(lac_load_df: pd.DataFrame) -> None: |
|
""" |
|
Display filtered LAC load data with interactive charts. |
|
|
|
Args: |
|
lac_load_df: DataFrame containing LAC load data |
|
""" |
|
st.write("### Filtered LAC Load by BSC and BSC_Lac") |
|
|
|
bsc_col, bsc_lac_col = st.columns(2) |
|
|
|
with bsc_col: |
|
selected_bsc = st.multiselect( |
|
"Select BSC", |
|
lac_load_df["BSC_name"].unique(), |
|
key="selected_bsc", |
|
default=[lac_load_df["BSC_name"].unique()[0]], |
|
) |
|
|
|
with bsc_lac_col: |
|
selected_bsc_lac = st.multiselect( |
|
"Select BSC_Lac", |
|
lac_load_df[lac_load_df["BSC_name"].isin(selected_bsc)]["BSC_Lac"].unique(), |
|
key="selected_bsc_lac", |
|
default=lac_load_df[lac_load_df["BSC_name"].isin(selected_bsc)][ |
|
"BSC_Lac" |
|
].unique(), |
|
) |
|
|
|
filtered_lac_load_df = lac_load_df[ |
|
lac_load_df["BSC_name"].isin(selected_bsc) |
|
& lac_load_df["BSC_Lac"].isin(selected_bsc_lac) |
|
] |
|
|
|
|
|
chart1, chart2 = st.columns(2) |
|
with chart1: |
|
st.write("### Paging Messages on Air Interface") |
|
fig1 = px.line( |
|
filtered_lac_load_df, |
|
x="datetime", |
|
y="Paging_messages_on_air_interface", |
|
color="BSC_Lac", |
|
title="Max Paging Messages on Air Interface Per BSC_Lac", |
|
) |
|
fig1.update_layout( |
|
xaxis_title="<b>Datetime</b>", |
|
yaxis_title="<b>Paging Messages on Air Interface</b>", |
|
) |
|
fig1.add_hline(y=256000, line_color="red", line_dash="dash", line_width=2) |
|
st.plotly_chart(fig1) |
|
|
|
with chart2: |
|
st.write("### Delete Paging Commands") |
|
fig2 = px.line( |
|
filtered_lac_load_df, |
|
x="datetime", |
|
y="DELETE_PAGING_COMMAND_c003038", |
|
color="BSC_Lac", |
|
title="Max Delete Paging Commands Per BSC_Lac", |
|
) |
|
fig2.update_layout( |
|
xaxis_title="<b>Datetime</b>", |
|
yaxis_title="<b>Delete Paging Commands</b>", |
|
) |
|
st.plotly_chart(fig2) |
|
|
|
st.write("### Filtered LAC Load Data") |
|
st.dataframe(filtered_lac_load_df) |
|
|
|
|
|
def main() -> None: |
|
"""Main function to run the Streamlit app.""" |
|
display_ui() |
|
|
|
|
|
file1, file2 = st.columns(2) |
|
with file1: |
|
uploaded_dump = st.file_uploader("Upload Dump file in xlsb format", type="xlsb") |
|
with file2: |
|
uploaded_hourly_report = st.file_uploader( |
|
"Upload Hourly Report in CSV format", type="csv" |
|
) |
|
|
|
if uploaded_dump is not None and uploaded_hourly_report is not None: |
|
if st.button("Analyze Data", type="primary"): |
|
with st.spinner("Analyzing data..."): |
|
dfs = analyze_lac_load( |
|
dump_path=uploaded_dump, |
|
hourly_report_path=uploaded_hourly_report, |
|
) |
|
|
|
lac_load_df = dfs[0] |
|
max_paging_df = dfs[1] |
|
|
|
if lac_load_df is not None and "lac_load_df" not in st.session_state: |
|
st.session_state.lac_load_df = lac_load_df |
|
st.write("### LAC Load and Utilization with Max Paging 640800") |
|
st.dataframe(max_paging_df) |
|
display_filtered_lac_load(lac_load_df) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|