db_query / process_kpi /process_wcel_capacity.py
DavMelchi's picture
Add City to DataBase
002ceab
import pandas as pd
from utils.kpi_analysis_utils import (
analyze_fails_kpi,
cell_availability_analysis,
combine_comments,
create_daily_date,
create_dfs_per_kpi,
kpi_naming_cleaning,
summarize_fails_comments,
)
from utils.utils_vars import get_physical_db
tx_comments_mapping = {
"iub_frameloss exceeded threshold": "iub frameloss",
"iub_frameloss exceeded threshold, hsdpa_congestion_rate_iub exceeded threshold": "iub frameloss and hsdpa iub congestion",
"hsdpa_congestion_rate_iub exceeded threshold": "hsdpa iub congestion",
}
operational_comments_mapping = {
"Down Site": "Down Cell",
"iub frameloss, instability": "Availability and TX issues",
"iub frameloss and hsdpa iub congestion, Availability OK": "TX issues",
"iub frameloss, Availability OK": "TX issues",
"critical instability": "Availability issues",
"iub frameloss, critical instability": "Availability and TX issues",
"iub frameloss and hsdpa iub congestion, instability": "Availability and TX issues",
"Availability OK": "Site OK",
"hsdpa iub congestion, instability": "Availability and TX issues",
"instability": "Availability issues",
"hsdpa iub congestion, Availability OK": "TX issues",
"iub frameloss and hsdpa iub congestion, critical instability": "Availability and TX issues",
"hsdpa iub congestion, critical instability": "Availability and TX issues",
}
fails_comments_mapping = {
"ac, ac_dl, bts, code fails": "Power, Bts and Code fails",
"bts fails": "Bts fails",
"ac, bts, code fails": "Power and Code fails",
"ac, code fails": "Power fails",
"ac fails": "Power fails",
"ac, ac_dl fails": "Power fails",
"ac, bts fails": "Power and Bts fails",
"ac, ac_dl, bts fails": "Power and Bts fails",
"ac, ac_dl, code fails": "Power and Code fails",
"ac, ac_ul, bts, code fails": "Power, Bts and Code fails",
"ac, ac_dl, ac_ul, bts, code fails": "Power, Bts and Code fails",
}
KPI_COLUMNS = [
"WCEL_name",
"date",
"Cell_Availability_excluding_blocked_by_user_state_BLU",
"Total_CS_traffic_Erl",
"HSDPA_TRAFFIC_VOLUME",
"HSDPA_USER_THROUGHPUT",
"Max_simult_HSDPA_users",
"IUB_LOSS_CC_FRAME_LOSS_IND_M1022C71",
"HSDPA_congestion_rate_in_Iub",
"rrc_conn_stp_fail_ac_M1001C3",
"RRC_CONN_STP_FAIL_AC_UL_M1001C731",
"RRC_CONN_STP_FAIL_AC_DL_M1001C732",
"RRC_CONN_STP_FAIL_AC_COD_M1001C733",
"rrc_conn_stp_fail_bts_M1001C4",
]
WCEL_ANALYSIS_COLUMNS = [
"WCEL_name",
"Average_cell_availability_daily",
"number_of_days_exceeding_availability_threshold_daily",
"availability_comment_daily",
"sum_traffic_cs",
"sum_traffic_dl",
"max_dl_throughput",
"avg_dl_throughput",
"max_users",
"max_iub_frameloss",
"number_of_days_with_iub_frameloss_exceeded",
"max_hsdpa_congestion_rate_iub",
"number_of_days_with_hsdpa_congestion_rate_iub_exceeded",
"max_rrc_fail_ac",
"number_of_days_with_rrc_fail_ac_exceeded",
"max_rrc_fail_ac_ul",
"number_of_days_with_rrc_fail_ac_ul_exceeded",
"max_rrc_fail_ac_dl",
"number_of_days_with_rrc_fail_ac_dl_exceeded",
"max_rrc_fail_code",
"number_of_days_with_rrc_fail_code_exceeded",
"max_rrc_fail_bts",
"number_of_days_with_rrc_fail_bts_exceeded",
"tx_congestion_comments",
"operational_comments",
"fails_comments",
"final_comments",
]
class WcelCapacity:
final_results: pd.DataFrame = None
def wcel_kpi_analysis(
df: pd.DataFrame,
num_last_days: int,
num_threshold_days: int,
availability_threshold: int,
iub_frameloss_threshold: int,
hsdpa_congestion_rate_iub_threshold: int,
fails_treshold: int,
) -> pd.DataFrame:
pivoted_kpi_dfs = create_dfs_per_kpi(
df=df,
pivot_date_column="date",
pivot_name_column="WCEL_name",
kpi_columns_from=2,
)
cell_availability_df = cell_availability_analysis(
df=pivoted_kpi_dfs["Cell_Availability_excluding_blocked_by_user_state_BLU"],
days=num_last_days,
availability_threshold=availability_threshold,
)
# Trafics, throughput and max users
trafic_cs_df = pivoted_kpi_dfs["Total_CS_traffic_Erl"]
hsdpa_traffic_df = pivoted_kpi_dfs["HSDPA_TRAFFIC_VOLUME"]
hsdpa_user_throughput_df = pivoted_kpi_dfs["HSDPA_USER_THROUGHPUT"]
max_simult_hsdpa_users_df = pivoted_kpi_dfs["Max_simult_HSDPA_users"]
# Add Max of Trafics, throughput and max users
trafic_cs_df["sum_traffic_cs"] = trafic_cs_df.sum(axis=1)
hsdpa_traffic_df["sum_traffic_dl"] = hsdpa_traffic_df.sum(axis=1)
hsdpa_user_throughput_df["max_dl_throughput"] = hsdpa_user_throughput_df.max(axis=1)
max_simult_hsdpa_users_df["max_users"] = max_simult_hsdpa_users_df.max(axis=1)
# add average of Trafics, throughput and max users
hsdpa_user_throughput_df["avg_dl_throughput"] = hsdpa_user_throughput_df.mean(
axis=1
)
max_simult_hsdpa_users_df["avg_users"] = max_simult_hsdpa_users_df.mean(axis=1)
# TX Congestion
iub_frameloss_df = pivoted_kpi_dfs["IUB_LOSS_CC_FRAME_LOSS_IND_M1022C71"]
hsdpa_congestion_rate_iub_df = pivoted_kpi_dfs["HSDPA_congestion_rate_in_Iub"]
iub_frameloss_df = analyze_fails_kpi(
df=iub_frameloss_df,
number_of_kpi_days=num_last_days,
number_of_threshold_days=num_threshold_days,
kpi_threshold=iub_frameloss_threshold,
kpi_column_name="iub_frameloss",
)
hsdpa_congestion_rate_iub_df = analyze_fails_kpi(
df=hsdpa_congestion_rate_iub_df,
number_of_kpi_days=num_last_days,
number_of_threshold_days=num_threshold_days,
kpi_threshold=hsdpa_congestion_rate_iub_threshold,
kpi_column_name="hsdpa_congestion_rate_iub",
)
# Fails
rrc_conn_stp_fail_ac_df = analyze_fails_kpi(
df=pivoted_kpi_dfs["rrc_conn_stp_fail_ac_M1001C3"],
number_of_kpi_days=num_last_days,
number_of_threshold_days=num_threshold_days,
kpi_threshold=fails_treshold,
kpi_column_name="rrc_fail_ac",
)
rrc_conn_stp_fail_ac_ul_df = analyze_fails_kpi(
df=pivoted_kpi_dfs["RRC_CONN_STP_FAIL_AC_UL_M1001C731"],
number_of_kpi_days=num_last_days,
number_of_threshold_days=num_threshold_days,
kpi_threshold=fails_treshold,
kpi_column_name="rrc_fail_ac_ul",
)
rrc_conn_stp_fail_ac_dl_df = analyze_fails_kpi(
df=pivoted_kpi_dfs["RRC_CONN_STP_FAIL_AC_DL_M1001C732"],
number_of_kpi_days=num_last_days,
number_of_threshold_days=num_threshold_days,
kpi_threshold=fails_treshold,
kpi_column_name="rrc_fail_ac_dl",
)
rrc_conn_stp_fail_ac_cod_df = analyze_fails_kpi(
df=pivoted_kpi_dfs["RRC_CONN_STP_FAIL_AC_COD_M1001C733"],
number_of_kpi_days=num_last_days,
number_of_threshold_days=num_threshold_days,
kpi_threshold=fails_treshold,
kpi_column_name="rrc_fail_code",
)
rrc_conn_stp_fail_bts_df = analyze_fails_kpi(
df=pivoted_kpi_dfs["rrc_conn_stp_fail_bts_M1001C4"],
number_of_kpi_days=num_last_days,
number_of_threshold_days=num_threshold_days,
kpi_threshold=fails_treshold,
kpi_column_name="rrc_fail_bts",
)
kpi_df = pd.concat(
[
cell_availability_df,
trafic_cs_df,
hsdpa_traffic_df,
hsdpa_user_throughput_df,
max_simult_hsdpa_users_df,
iub_frameloss_df,
hsdpa_congestion_rate_iub_df,
rrc_conn_stp_fail_ac_df,
rrc_conn_stp_fail_ac_ul_df,
rrc_conn_stp_fail_ac_dl_df,
rrc_conn_stp_fail_ac_cod_df,
rrc_conn_stp_fail_bts_df,
],
axis=1,
)
kpi_df = kpi_df.reset_index()
kpi_df = combine_comments(
kpi_df,
"iub_frameloss_comment",
"hsdpa_congestion_rate_iub_comment",
new_column="tx_congestion_comments",
)
kpi_df["tx_congestion_comments"] = kpi_df["tx_congestion_comments"].apply(
lambda x: tx_comments_mapping.get(x, x)
)
kpi_df = combine_comments(
kpi_df,
"tx_congestion_comments",
"availability_comment_daily",
new_column="operational_comments",
)
kpi_df["operational_comments"] = kpi_df["operational_comments"].apply(
lambda x: operational_comments_mapping.get(x, x)
)
kpi_df = combine_comments(
kpi_df,
"rrc_fail_ac_comment",
"rrc_fail_ac_ul_comment",
"rrc_fail_ac_dl_comment",
"rrc_fail_code_comment",
"rrc_fail_bts_comment",
new_column="fails_comments",
)
kpi_df["fails_comments"] = kpi_df["fails_comments"].apply(summarize_fails_comments)
kpi_df["fails_comments"] = kpi_df["fails_comments"].apply(
lambda x: fails_comments_mapping.get(x, x)
)
kpi_df = combine_comments(
kpi_df,
"operational_comments",
"fails_comments",
new_column="final_comments",
)
wcel_analysis_df = kpi_df[WCEL_ANALYSIS_COLUMNS]
wcel_analysis_df = wcel_analysis_df.droplevel(level=1, axis=1)
# Rename
wcel_analysis_df = wcel_analysis_df.rename(
columns={
"WCEL_name": "name",
"Average_cell_availability_daily": "Avg_availability",
"number_of_days_exceeding_availability_threshold_daily": "Avail_exceed_days",
"availability_comment_daily": "availability_comment",
"number_of_days_with_iub_frameloss_exceeded": "iub_frameloss_exceed_days",
"number_of_days_with_hsdpa_congestion_rate_iub_exceeded": "hsdpa_iub_exceed_days",
"number_of_days_with_rrc_fail_ac_exceeded": "ac_fail_exceed_days",
"number_of_days_with_rrc_fail_ac_ul_exceeded": "ac_ul_fail_exceed_days",
"number_of_days_with_rrc_fail_ac_dl_exceeded": "ac_dl_fail_exceed_days",
"number_of_days_with_rrc_fail_code_exceeded": "code_fail_exceed_days",
"number_of_days_with_rrc_fail_bts_exceeded": "bts_fail_exceed_days",
}
)
# remove row if name less than 5 characters
wcel_analysis_df = wcel_analysis_df[wcel_analysis_df["name"].str.len() >= 5]
wcel_analysis_df["code"] = wcel_analysis_df["name"].str.split("_").str[0]
wcel_analysis_df["code"] = (
pd.to_numeric(wcel_analysis_df["code"], errors="coerce").fillna(0).astype(int)
)
wcel_analysis_df["Region"] = wcel_analysis_df["name"].str.split("_").str[1]
# move code to the first column
wcel_analysis_df = wcel_analysis_df[
["code", "Region"]
+ [col for col in wcel_analysis_df if col != "code" and col != "Region"]
]
# Load physical database
physical_db: pd.DataFrame = get_physical_db()
# Convert code_sector to code
physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0]
# remove duplicates
physical_db = physical_db.drop_duplicates(subset="code")
# keep only code and longitude and latitude
physical_db = physical_db[["code", "Longitude", "Latitude", "City"]]
physical_db["code"] = (
pd.to_numeric(physical_db["code"], errors="coerce").fillna(0).astype(int)
)
wcel_analysis_df = pd.merge(
wcel_analysis_df,
physical_db,
on="code",
how="left",
)
return [wcel_analysis_df, kpi_df]
def load_and_process_wcel_capacity_data(
uploaded_file: pd.DataFrame,
num_last_days: int,
num_threshold_days: int,
availability_threshold: int,
iub_frameloss_threshold: int,
hsdpa_congestion_rate_iub_threshold: int,
fails_treshold: int,
) -> pd.DataFrame:
"""
Load and process data for WCEL capacity analysis.
Args:
uploaded_file: Uploaded CSV file containing WCEL capacity data
num_last_days: Number of days for analysis
num_threshold_days: Minimum days above threshold to flag for upgrade
availability_threshold: Utilization threshold percentage for flagging
iub_frameloss_threshold: Utilization threshold percentage for flagging
hsdpa_congestion_rate_iub_threshold: Utilization threshold percentage for flagging
fails_treshold: Utilization threshold percentage for flagging
Returns:
Processed DataFrame with WCEL capacity analysis results
"""
# Load data
df = pd.read_csv(uploaded_file, delimiter=";")
df = kpi_naming_cleaning(df)
df = create_daily_date(df)
df = df[KPI_COLUMNS]
dfs = wcel_kpi_analysis(
df,
num_last_days,
num_threshold_days,
availability_threshold,
iub_frameloss_threshold,
hsdpa_congestion_rate_iub_threshold,
fails_treshold,
)
return dfs