import pandas as pd from utils.kpi_analysis_utils import ( analyze_fails_kpi, cell_availability_analysis, combine_comments, create_daily_date, create_dfs_per_kpi, kpi_naming_cleaning, summarize_fails_comments, ) from utils.utils_vars import get_physical_db tx_comments_mapping = { "iub_frameloss exceeded threshold": "iub frameloss", "iub_frameloss exceeded threshold, hsdpa_congestion_rate_iub exceeded threshold": "iub frameloss and hsdpa iub congestion", "hsdpa_congestion_rate_iub exceeded threshold": "hsdpa iub congestion", } operational_comments_mapping = { "Down Site": "Down Cell", "iub frameloss, instability": "Availability and TX issues", "iub frameloss and hsdpa iub congestion, Availability OK": "TX issues", "iub frameloss, Availability OK": "TX issues", "critical instability": "Availability issues", "iub frameloss, critical instability": "Availability and TX issues", "iub frameloss and hsdpa iub congestion, instability": "Availability and TX issues", "Availability OK": "Site OK", "hsdpa iub congestion, instability": "Availability and TX issues", "instability": "Availability issues", "hsdpa iub congestion, Availability OK": "TX issues", "iub frameloss and hsdpa iub congestion, critical instability": "Availability and TX issues", "hsdpa iub congestion, critical instability": "Availability and TX issues", } fails_comments_mapping = { "ac, ac_dl, bts, code fails": "Power, Bts and Code fails", "bts fails": "Bts fails", "ac, bts, code fails": "Power and Code fails", "ac, code fails": "Power fails", "ac fails": "Power fails", "ac, ac_dl fails": "Power fails", "ac, bts fails": "Power and Bts fails", "ac, ac_dl, bts fails": "Power and Bts fails", "ac, ac_dl, code fails": "Power and Code fails", "ac, ac_ul, bts, code fails": "Power, Bts and Code fails", "ac, ac_dl, ac_ul, bts, code fails": "Power, Bts and Code fails", } KPI_COLUMNS = [ "WCEL_name", "date", "Cell_Availability_excluding_blocked_by_user_state_BLU", "Total_CS_traffic_Erl", "HSDPA_TRAFFIC_VOLUME", "HSDPA_USER_THROUGHPUT", "Max_simult_HSDPA_users", "IUB_LOSS_CC_FRAME_LOSS_IND_M1022C71", "HSDPA_congestion_rate_in_Iub", "rrc_conn_stp_fail_ac_M1001C3", "RRC_CONN_STP_FAIL_AC_UL_M1001C731", "RRC_CONN_STP_FAIL_AC_DL_M1001C732", "RRC_CONN_STP_FAIL_AC_COD_M1001C733", "rrc_conn_stp_fail_bts_M1001C4", ] WCEL_ANALYSIS_COLUMNS = [ "WCEL_name", "Average_cell_availability_daily", "number_of_days_exceeding_availability_threshold_daily", "availability_comment_daily", "sum_traffic_cs", "sum_traffic_dl", "max_dl_throughput", "avg_dl_throughput", "max_users", "max_iub_frameloss", "number_of_days_with_iub_frameloss_exceeded", "max_hsdpa_congestion_rate_iub", "number_of_days_with_hsdpa_congestion_rate_iub_exceeded", "max_rrc_fail_ac", "number_of_days_with_rrc_fail_ac_exceeded", "max_rrc_fail_ac_ul", "number_of_days_with_rrc_fail_ac_ul_exceeded", "max_rrc_fail_ac_dl", "number_of_days_with_rrc_fail_ac_dl_exceeded", "max_rrc_fail_code", "number_of_days_with_rrc_fail_code_exceeded", "max_rrc_fail_bts", "number_of_days_with_rrc_fail_bts_exceeded", "tx_congestion_comments", "operational_comments", "fails_comments", "final_comments", ] class WcelCapacity: final_results: pd.DataFrame = None def wcel_kpi_analysis( df: pd.DataFrame, num_last_days: int, num_threshold_days: int, availability_threshold: int, iub_frameloss_threshold: int, hsdpa_congestion_rate_iub_threshold: int, fails_treshold: int, ) -> pd.DataFrame: pivoted_kpi_dfs = create_dfs_per_kpi( df=df, pivot_date_column="date", pivot_name_column="WCEL_name", kpi_columns_from=2, ) cell_availability_df = cell_availability_analysis( df=pivoted_kpi_dfs["Cell_Availability_excluding_blocked_by_user_state_BLU"], days=num_last_days, availability_threshold=availability_threshold, ) # Trafics, throughput and max users trafic_cs_df = pivoted_kpi_dfs["Total_CS_traffic_Erl"] hsdpa_traffic_df = pivoted_kpi_dfs["HSDPA_TRAFFIC_VOLUME"] hsdpa_user_throughput_df = pivoted_kpi_dfs["HSDPA_USER_THROUGHPUT"] max_simult_hsdpa_users_df = pivoted_kpi_dfs["Max_simult_HSDPA_users"] # Add Max of Trafics, throughput and max users trafic_cs_df["sum_traffic_cs"] = trafic_cs_df.sum(axis=1) hsdpa_traffic_df["sum_traffic_dl"] = hsdpa_traffic_df.sum(axis=1) hsdpa_user_throughput_df["max_dl_throughput"] = hsdpa_user_throughput_df.max(axis=1) max_simult_hsdpa_users_df["max_users"] = max_simult_hsdpa_users_df.max(axis=1) # add average of Trafics, throughput and max users hsdpa_user_throughput_df["avg_dl_throughput"] = hsdpa_user_throughput_df.mean( axis=1 ) max_simult_hsdpa_users_df["avg_users"] = max_simult_hsdpa_users_df.mean(axis=1) # TX Congestion iub_frameloss_df = pivoted_kpi_dfs["IUB_LOSS_CC_FRAME_LOSS_IND_M1022C71"] hsdpa_congestion_rate_iub_df = pivoted_kpi_dfs["HSDPA_congestion_rate_in_Iub"] iub_frameloss_df = analyze_fails_kpi( df=iub_frameloss_df, number_of_kpi_days=num_last_days, number_of_threshold_days=num_threshold_days, kpi_threshold=iub_frameloss_threshold, kpi_column_name="iub_frameloss", ) hsdpa_congestion_rate_iub_df = analyze_fails_kpi( df=hsdpa_congestion_rate_iub_df, number_of_kpi_days=num_last_days, number_of_threshold_days=num_threshold_days, kpi_threshold=hsdpa_congestion_rate_iub_threshold, kpi_column_name="hsdpa_congestion_rate_iub", ) # Fails rrc_conn_stp_fail_ac_df = analyze_fails_kpi( df=pivoted_kpi_dfs["rrc_conn_stp_fail_ac_M1001C3"], number_of_kpi_days=num_last_days, number_of_threshold_days=num_threshold_days, kpi_threshold=fails_treshold, kpi_column_name="rrc_fail_ac", ) rrc_conn_stp_fail_ac_ul_df = analyze_fails_kpi( df=pivoted_kpi_dfs["RRC_CONN_STP_FAIL_AC_UL_M1001C731"], number_of_kpi_days=num_last_days, number_of_threshold_days=num_threshold_days, kpi_threshold=fails_treshold, kpi_column_name="rrc_fail_ac_ul", ) rrc_conn_stp_fail_ac_dl_df = analyze_fails_kpi( df=pivoted_kpi_dfs["RRC_CONN_STP_FAIL_AC_DL_M1001C732"], number_of_kpi_days=num_last_days, number_of_threshold_days=num_threshold_days, kpi_threshold=fails_treshold, kpi_column_name="rrc_fail_ac_dl", ) rrc_conn_stp_fail_ac_cod_df = analyze_fails_kpi( df=pivoted_kpi_dfs["RRC_CONN_STP_FAIL_AC_COD_M1001C733"], number_of_kpi_days=num_last_days, number_of_threshold_days=num_threshold_days, kpi_threshold=fails_treshold, kpi_column_name="rrc_fail_code", ) rrc_conn_stp_fail_bts_df = analyze_fails_kpi( df=pivoted_kpi_dfs["rrc_conn_stp_fail_bts_M1001C4"], number_of_kpi_days=num_last_days, number_of_threshold_days=num_threshold_days, kpi_threshold=fails_treshold, kpi_column_name="rrc_fail_bts", ) kpi_df = pd.concat( [ cell_availability_df, trafic_cs_df, hsdpa_traffic_df, hsdpa_user_throughput_df, max_simult_hsdpa_users_df, iub_frameloss_df, hsdpa_congestion_rate_iub_df, rrc_conn_stp_fail_ac_df, rrc_conn_stp_fail_ac_ul_df, rrc_conn_stp_fail_ac_dl_df, rrc_conn_stp_fail_ac_cod_df, rrc_conn_stp_fail_bts_df, ], axis=1, ) kpi_df = kpi_df.reset_index() kpi_df = combine_comments( kpi_df, "iub_frameloss_comment", "hsdpa_congestion_rate_iub_comment", new_column="tx_congestion_comments", ) kpi_df["tx_congestion_comments"] = kpi_df["tx_congestion_comments"].apply( lambda x: tx_comments_mapping.get(x, x) ) kpi_df = combine_comments( kpi_df, "tx_congestion_comments", "availability_comment_daily", new_column="operational_comments", ) kpi_df["operational_comments"] = kpi_df["operational_comments"].apply( lambda x: operational_comments_mapping.get(x, x) ) kpi_df = combine_comments( kpi_df, "rrc_fail_ac_comment", "rrc_fail_ac_ul_comment", "rrc_fail_ac_dl_comment", "rrc_fail_code_comment", "rrc_fail_bts_comment", new_column="fails_comments", ) kpi_df["fails_comments"] = kpi_df["fails_comments"].apply(summarize_fails_comments) kpi_df["fails_comments"] = kpi_df["fails_comments"].apply( lambda x: fails_comments_mapping.get(x, x) ) kpi_df = combine_comments( kpi_df, "operational_comments", "fails_comments", new_column="final_comments", ) wcel_analysis_df = kpi_df[WCEL_ANALYSIS_COLUMNS] wcel_analysis_df = wcel_analysis_df.droplevel(level=1, axis=1) # Rename wcel_analysis_df = wcel_analysis_df.rename( columns={ "WCEL_name": "name", "Average_cell_availability_daily": "Avg_availability", "number_of_days_exceeding_availability_threshold_daily": "Avail_exceed_days", "availability_comment_daily": "availability_comment", "number_of_days_with_iub_frameloss_exceeded": "iub_frameloss_exceed_days", "number_of_days_with_hsdpa_congestion_rate_iub_exceeded": "hsdpa_iub_exceed_days", "number_of_days_with_rrc_fail_ac_exceeded": "ac_fail_exceed_days", "number_of_days_with_rrc_fail_ac_ul_exceeded": "ac_ul_fail_exceed_days", "number_of_days_with_rrc_fail_ac_dl_exceeded": "ac_dl_fail_exceed_days", "number_of_days_with_rrc_fail_code_exceeded": "code_fail_exceed_days", "number_of_days_with_rrc_fail_bts_exceeded": "bts_fail_exceed_days", } ) # remove row if name less than 5 characters wcel_analysis_df = wcel_analysis_df[wcel_analysis_df["name"].str.len() >= 5] wcel_analysis_df["code"] = wcel_analysis_df["name"].str.split("_").str[0] wcel_analysis_df["code"] = ( pd.to_numeric(wcel_analysis_df["code"], errors="coerce").fillna(0).astype(int) ) wcel_analysis_df["Region"] = wcel_analysis_df["name"].str.split("_").str[1] # move code to the first column wcel_analysis_df = wcel_analysis_df[ ["code", "Region"] + [col for col in wcel_analysis_df if col != "code" and col != "Region"] ] # Load physical database physical_db: pd.DataFrame = get_physical_db() # Convert code_sector to code physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0] # remove duplicates physical_db = physical_db.drop_duplicates(subset="code") # keep only code and longitude and latitude physical_db = physical_db[["code", "Longitude", "Latitude", "City"]] physical_db["code"] = ( pd.to_numeric(physical_db["code"], errors="coerce").fillna(0).astype(int) ) wcel_analysis_df = pd.merge( wcel_analysis_df, physical_db, on="code", how="left", ) return [wcel_analysis_df, kpi_df] def load_and_process_wcel_capacity_data( uploaded_file: pd.DataFrame, num_last_days: int, num_threshold_days: int, availability_threshold: int, iub_frameloss_threshold: int, hsdpa_congestion_rate_iub_threshold: int, fails_treshold: int, ) -> pd.DataFrame: """ Load and process data for WCEL capacity analysis. Args: uploaded_file: Uploaded CSV file containing WCEL capacity data num_last_days: Number of days for analysis num_threshold_days: Minimum days above threshold to flag for upgrade availability_threshold: Utilization threshold percentage for flagging iub_frameloss_threshold: Utilization threshold percentage for flagging hsdpa_congestion_rate_iub_threshold: Utilization threshold percentage for flagging fails_treshold: Utilization threshold percentage for flagging Returns: Processed DataFrame with WCEL capacity analysis results """ # Load data df = pd.read_csv(uploaded_file, delimiter=";") df = kpi_naming_cleaning(df) df = create_daily_date(df) df = df[KPI_COLUMNS] dfs = wcel_kpi_analysis( df, num_last_days, num_threshold_days, availability_threshold, iub_frameloss_threshold, hsdpa_congestion_rate_iub_threshold, fails_treshold, ) return dfs