import pandas as pd from utils.kpi_analysis_utils import ( cell_availability_analysis, combine_comments, create_daily_date, create_dfs_per_kpi, kpi_naming_cleaning, ) class WbtsCapacity: final_results: pd.DataFrame = None def check_deviation(row: pd.Series, max_diff: float = 3.0, type: str = "") -> str: """ Check if any value in the row deviates more than max_diff from the most common value. Args: row: Series of values to check for deviation max_diff: Maximum allowed difference from the most common value type: Type identifier for the deviation message Returns: A message indicating deviation if found, otherwise an empty string """ numeric_row = row.astype(float) # Ensure numeric mode_series = numeric_row.mode() # Safe fallback in case mode is empty most_common = mode_series.iloc[0] if not mode_series.empty else numeric_row.iloc[0] diffs = abs(numeric_row - most_common) if (diffs > max_diff).any(): return f"{type} Deviation > {max_diff} detected" else: return "" def max_used_bb_subunits_analysis( df: pd.DataFrame, days: int = 7, threshold: int = 80, number_of_threshold_days: int = 3, ) -> pd.DataFrame: """ Analyze maximum used baseband subunits and identify sites needing upgrades. Args: df: DataFrame containing baseband utilization data days: Number of days to analyze threshold: Utilization threshold percentage for flagging number_of_threshold_days: Minimum days above threshold to flag for upgrade Returns: DataFrame with analysis results and upgrade recommendations """ result_df = df.copy() last_days_df = result_df.iloc[:, -days:] last_days_df = last_days_df.fillna(0) result_df["Average_used_bb_ratio"] = last_days_df.mean(axis=1).round(2) # Count the number of days above threshold result_df["bb_number_of_days_exceeding_threshold"] = last_days_df.apply( lambda row: sum(1 for x in row if x >= threshold), axis=1 ) # Initialize comment column result_df["Average_used_bb_ratio_comment"] = "" # Apply condition for upgrade recommendation result_df.loc[ (result_df["bb_number_of_days_exceeding_threshold"] >= number_of_threshold_days) & (result_df["Average_used_bb_ratio"] >= threshold), "Average_used_bb_ratio_comment", ] = "need BB upgrade" return result_df def max_used_ce_analysis( df: pd.DataFrame, days: int = 7, threshold: int = 80, number_of_threshold_days: int = 3, ) -> pd.DataFrame: """ Analyze maximum used channel elements and identify sites needing upgrades. Args: df: DataFrame containing channel element utilization data days: Number of days to analyze threshold: Utilization threshold percentage for flagging number_of_threshold_days: Minimum days above threshold to flag for upgrade Returns: DataFrame with analysis results and upgrade recommendations """ result_df = df.copy().fillna(0) last_days_df = result_df.iloc[:, -days:] result_df["Average_used_ce_ratio"] = last_days_df.mean(axis=1).round(2) # Count the number of days above threshold result_df["ce_number_of_days_exceeding_threshold"] = last_days_df.apply( lambda row: sum(1 for x in row if x >= threshold), axis=1 ) # Initialize comment column result_df["Average_used_ce_ratio_comment"] = "" # Apply condition for upgrade recommendation result_df.loc[ (result_df["ce_number_of_days_exceeding_threshold"] >= number_of_threshold_days) & (result_df["Average_used_ce_ratio"] >= threshold), "Average_used_ce_ratio_comment", ] = "need CE upgrade" return result_df def num_bb_subunits_analysis(df: pd.DataFrame, days: int = 3) -> pd.DataFrame: """ Analyze baseband subunit count for deviations. Args: df: DataFrame containing baseband subunit count data days: Number of days to analyze Returns: DataFrame with deviation analysis comments """ result_df = df.copy() last_days_df = result_df.iloc[:, -days:] result_df["num_bb_subunits_comment"] = last_days_df.apply( lambda row: check_deviation(row, type="bb"), axis=1 ) return result_df def avail_ce_analysis(df: pd.DataFrame, days: int = 7) -> pd.DataFrame: """ Analyze available channel elements for deviations. Args: df: DataFrame containing available channel element data days: Number of days to analyze Returns: DataFrame with deviation analysis comments """ result_df = df.copy() last_days_df = result_df.iloc[:, -days:] result_df["avail_ce_comment"] = last_days_df.apply( lambda row: check_deviation(row, max_diff=96, type="ce"), axis=1 ) return result_df def bb_comments_analysis(df: pd.DataFrame) -> pd.DataFrame: """ Combine baseband related comments into a single column. Args: df: DataFrame containing baseband comment columns Returns: DataFrame with combined baseband comments """ return combine_comments( df, "num_bb_subunits_comment", "Average_used_bb_ratio_comment", "availability_comment_daily", new_column="bb_comments", ) def ce_comments_analysis(df: pd.DataFrame) -> pd.DataFrame: """ Combine channel element related comments into a single column. Args: df: DataFrame containing channel element comment columns Returns: DataFrame with combined channel element comments """ return combine_comments( df, "avail_ce_comment", "Average_used_ce_ratio_comment", "availability_comment_daily", new_column="ce_comments", ) def wbts_kpi_analysis( df: pd.DataFrame, num_days: int = 7, threshold: int = 80, number_of_threshold_days: int = 3, ) -> pd.DataFrame: """ Create pivoted DataFrames for each KPI and perform analysis. Args: df: DataFrame containing KPI data num_days: Number of days to analyze threshold: Utilization threshold percentage for flagging number_of_threshold_days: Minimum days above threshold to flag for upgrade Returns: DataFrame with combined analysis results """ # kpi_columns = df.columns[5:] pivoted_kpi_dfs = {} pivoted_kpi_dfs = create_dfs_per_kpi( df=df, pivot_date_column="date", pivot_name_column="DN", kpi_columns_from=5 ) # Extract individual KPI DataFrames wbts_name_df = pivoted_kpi_dfs["WBTS_name"].iloc[:, 0] licensed_ce_df = pivoted_kpi_dfs["LICENSED_R99CE_WBTS_M5008C48"] max_used_ce_dl_df = pivoted_kpi_dfs["MAX_USED_CE_R99_DL_M5008C12"] max_used_ce_ul_df = pivoted_kpi_dfs["MAX_USED_CE_R99_UL_M5008C15"] max_avail_ce_df = pivoted_kpi_dfs["MAX_AVAIL_R99_CE_M5006C0"] max_used_bb_subunits_df = pivoted_kpi_dfs["MAX_USED_BB_SUBUNITS_M5008C38"] num_bb_subunits_df = pivoted_kpi_dfs["NUM_BB_SUBUNITS_M5008C39"] max_bb_sus_util_ratio_df = pivoted_kpi_dfs["Max_BB_SUs_Util_ratio"] cell_availability_df = pivoted_kpi_dfs[ "Cell_Availability_excluding_blocked_by_user_state_BLU" ] total_cs_traffic_df = pivoted_kpi_dfs["Total_CS_traffic_Erl"] total_data_traffic_df = pivoted_kpi_dfs["Total_Data_Traffic"] max_used_ce_ratio_flexi_df = pivoted_kpi_dfs["Max_Used_CE_s_ratio_Flexi_R2"] # Perform analysis on each KPI DataFrame max_bb_sus_util_ratio_df = max_used_bb_subunits_analysis( max_bb_sus_util_ratio_df, num_days, threshold, number_of_threshold_days ) cell_availability_df = cell_availability_analysis(cell_availability_df, num_days) max_used_ce_ratio_flexi_df = max_used_ce_analysis( max_used_ce_ratio_flexi_df, num_days, threshold, number_of_threshold_days ) num_bb_subunits_df = num_bb_subunits_analysis(num_bb_subunits_df, num_days) licensed_ce_df = avail_ce_analysis(licensed_ce_df, num_days) # Concatenate all DataFrames result_df = pd.concat( [ wbts_name_df, licensed_ce_df, max_used_ce_dl_df, max_used_ce_ul_df, max_avail_ce_df, max_used_bb_subunits_df, num_bb_subunits_df, max_bb_sus_util_ratio_df, cell_availability_df, total_cs_traffic_df, total_data_traffic_df, max_used_ce_ratio_flexi_df, ], axis=1, ) # Add combined comments analysis result_df = bb_comments_analysis(result_df) result_df = ce_comments_analysis(result_df) return result_df def load_data( filepath: str, num_days: int, threshold: int, number_of_threshold_days: int, ) -> pd.DataFrame: """ Load data from CSV file and perform preprocessing and analysis. Args: filepath: Path to CSV file or uploaded file object num_days: Number of days to analyze threshold: Utilization threshold percentage for flagging number_of_threshold_days: Minimum days above threshold to flag for upgrade Returns: DataFrame with processed and analyzed data """ df = pd.read_csv(filepath, delimiter=";") # Preprocess data df = create_daily_date(df) df = kpi_naming_cleaning(df) # Reorder columns for better organization df = df[["date"] + [col for col in df.columns if col not in ["date"]]] df = df[[col for col in df.columns if col != "WBTS_name"] + ["WBTS_name"]] # Perform KPI analysis df = wbts_kpi_analysis(df, num_days, threshold, number_of_threshold_days) # for col, col_index in zip(df.columns, df.columns.get_indexer(df.columns)): # print(f"Column: {col}, Index: {col_index}") return df