|
import pandas as pd |
|
|
|
from utils.kpi_analysis_utils import ( |
|
cell_availability_analysis, |
|
combine_comments, |
|
create_daily_date, |
|
create_dfs_per_kpi, |
|
kpi_naming_cleaning, |
|
) |
|
|
|
|
|
class WbtsCapacity: |
|
final_results: pd.DataFrame = None |
|
|
|
|
|
def check_deviation(row: pd.Series, max_diff: float = 3.0, type: str = "") -> str: |
|
""" |
|
Check if any value in the row deviates more than max_diff from the most common value. |
|
|
|
Args: |
|
row: Series of values to check for deviation |
|
max_diff: Maximum allowed difference from the most common value |
|
type: Type identifier for the deviation message |
|
|
|
Returns: |
|
A message indicating deviation if found, otherwise an empty string |
|
""" |
|
numeric_row = row.astype(float) |
|
mode_series = numeric_row.mode() |
|
|
|
|
|
most_common = mode_series.iloc[0] if not mode_series.empty else numeric_row.iloc[0] |
|
|
|
diffs = abs(numeric_row - most_common) |
|
|
|
if (diffs > max_diff).any(): |
|
return f"{type} Deviation > {max_diff} detected" |
|
else: |
|
return "" |
|
|
|
|
|
def max_used_bb_subunits_analysis( |
|
df: pd.DataFrame, |
|
days: int = 7, |
|
threshold: int = 80, |
|
number_of_threshold_days: int = 3, |
|
) -> pd.DataFrame: |
|
""" |
|
Analyze maximum used baseband subunits and identify sites needing upgrades. |
|
|
|
Args: |
|
df: DataFrame containing baseband utilization data |
|
days: Number of days to analyze |
|
threshold: Utilization threshold percentage for flagging |
|
number_of_threshold_days: Minimum days above threshold to flag for upgrade |
|
|
|
Returns: |
|
DataFrame with analysis results and upgrade recommendations |
|
""" |
|
result_df = df.copy() |
|
last_days_df = result_df.iloc[:, -days:] |
|
last_days_df = last_days_df.fillna(0) |
|
|
|
result_df["Average_used_bb_ratio"] = last_days_df.mean(axis=1).round(2) |
|
|
|
result_df["bb_number_of_days_exceeding_threshold"] = last_days_df.apply( |
|
lambda row: sum(1 for x in row if x >= threshold), axis=1 |
|
) |
|
|
|
|
|
result_df["Average_used_bb_ratio_comment"] = "" |
|
|
|
|
|
result_df.loc[ |
|
(result_df["bb_number_of_days_exceeding_threshold"] >= number_of_threshold_days) |
|
& (result_df["Average_used_bb_ratio"] >= threshold), |
|
"Average_used_bb_ratio_comment", |
|
] = "need BB upgrade" |
|
|
|
return result_df |
|
|
|
|
|
def max_used_ce_analysis( |
|
df: pd.DataFrame, |
|
days: int = 7, |
|
threshold: int = 80, |
|
number_of_threshold_days: int = 3, |
|
) -> pd.DataFrame: |
|
""" |
|
Analyze maximum used channel elements and identify sites needing upgrades. |
|
|
|
Args: |
|
df: DataFrame containing channel element utilization data |
|
days: Number of days to analyze |
|
threshold: Utilization threshold percentage for flagging |
|
number_of_threshold_days: Minimum days above threshold to flag for upgrade |
|
|
|
Returns: |
|
DataFrame with analysis results and upgrade recommendations |
|
""" |
|
result_df = df.copy().fillna(0) |
|
last_days_df = result_df.iloc[:, -days:] |
|
|
|
result_df["Average_used_ce_ratio"] = last_days_df.mean(axis=1).round(2) |
|
|
|
|
|
result_df["ce_number_of_days_exceeding_threshold"] = last_days_df.apply( |
|
lambda row: sum(1 for x in row if x >= threshold), axis=1 |
|
) |
|
|
|
|
|
result_df["Average_used_ce_ratio_comment"] = "" |
|
|
|
|
|
result_df.loc[ |
|
(result_df["ce_number_of_days_exceeding_threshold"] >= number_of_threshold_days) |
|
& (result_df["Average_used_ce_ratio"] >= threshold), |
|
"Average_used_ce_ratio_comment", |
|
] = "need CE upgrade" |
|
|
|
return result_df |
|
|
|
|
|
def num_bb_subunits_analysis(df: pd.DataFrame, days: int = 3) -> pd.DataFrame: |
|
""" |
|
Analyze baseband subunit count for deviations. |
|
|
|
Args: |
|
df: DataFrame containing baseband subunit count data |
|
days: Number of days to analyze |
|
|
|
Returns: |
|
DataFrame with deviation analysis comments |
|
""" |
|
result_df = df.copy() |
|
last_days_df = result_df.iloc[:, -days:] |
|
result_df["num_bb_subunits_comment"] = last_days_df.apply( |
|
lambda row: check_deviation(row, type="bb"), axis=1 |
|
) |
|
return result_df |
|
|
|
|
|
def avail_ce_analysis(df: pd.DataFrame, days: int = 7) -> pd.DataFrame: |
|
""" |
|
Analyze available channel elements for deviations. |
|
|
|
Args: |
|
df: DataFrame containing available channel element data |
|
days: Number of days to analyze |
|
|
|
Returns: |
|
DataFrame with deviation analysis comments |
|
""" |
|
result_df = df.copy() |
|
last_days_df = result_df.iloc[:, -days:] |
|
result_df["avail_ce_comment"] = last_days_df.apply( |
|
lambda row: check_deviation(row, max_diff=96, type="ce"), axis=1 |
|
) |
|
return result_df |
|
|
|
|
|
def bb_comments_analysis(df: pd.DataFrame) -> pd.DataFrame: |
|
""" |
|
Combine baseband related comments into a single column. |
|
|
|
Args: |
|
df: DataFrame containing baseband comment columns |
|
|
|
Returns: |
|
DataFrame with combined baseband comments |
|
""" |
|
return combine_comments( |
|
df, |
|
"num_bb_subunits_comment", |
|
"Average_used_bb_ratio_comment", |
|
"availability_comment_daily", |
|
new_column="bb_comments", |
|
) |
|
|
|
|
|
def ce_comments_analysis(df: pd.DataFrame) -> pd.DataFrame: |
|
""" |
|
Combine channel element related comments into a single column. |
|
|
|
Args: |
|
df: DataFrame containing channel element comment columns |
|
|
|
Returns: |
|
DataFrame with combined channel element comments |
|
""" |
|
return combine_comments( |
|
df, |
|
"avail_ce_comment", |
|
"Average_used_ce_ratio_comment", |
|
"availability_comment_daily", |
|
new_column="ce_comments", |
|
) |
|
|
|
|
|
def wbts_kpi_analysis( |
|
df: pd.DataFrame, |
|
num_days: int = 7, |
|
threshold: int = 80, |
|
number_of_threshold_days: int = 3, |
|
) -> pd.DataFrame: |
|
""" |
|
Create pivoted DataFrames for each KPI and perform analysis. |
|
|
|
Args: |
|
df: DataFrame containing KPI data |
|
num_days: Number of days to analyze |
|
threshold: Utilization threshold percentage for flagging |
|
number_of_threshold_days: Minimum days above threshold to flag for upgrade |
|
|
|
Returns: |
|
DataFrame with combined analysis results |
|
""" |
|
|
|
pivoted_kpi_dfs = {} |
|
|
|
pivoted_kpi_dfs = create_dfs_per_kpi( |
|
df=df, pivot_date_column="date", pivot_name_column="DN", kpi_columns_from=5 |
|
) |
|
|
|
|
|
wbts_name_df = pivoted_kpi_dfs["WBTS_name"].iloc[:, 0] |
|
licensed_ce_df = pivoted_kpi_dfs["LICENSED_R99CE_WBTS_M5008C48"] |
|
max_used_ce_dl_df = pivoted_kpi_dfs["MAX_USED_CE_R99_DL_M5008C12"] |
|
max_used_ce_ul_df = pivoted_kpi_dfs["MAX_USED_CE_R99_UL_M5008C15"] |
|
max_avail_ce_df = pivoted_kpi_dfs["MAX_AVAIL_R99_CE_M5006C0"] |
|
max_used_bb_subunits_df = pivoted_kpi_dfs["MAX_USED_BB_SUBUNITS_M5008C38"] |
|
num_bb_subunits_df = pivoted_kpi_dfs["NUM_BB_SUBUNITS_M5008C39"] |
|
max_bb_sus_util_ratio_df = pivoted_kpi_dfs["Max_BB_SUs_Util_ratio"] |
|
cell_availability_df = pivoted_kpi_dfs[ |
|
"Cell_Availability_excluding_blocked_by_user_state_BLU" |
|
] |
|
total_cs_traffic_df = pivoted_kpi_dfs["Total_CS_traffic_Erl"] |
|
total_data_traffic_df = pivoted_kpi_dfs["Total_Data_Traffic"] |
|
max_used_ce_ratio_flexi_df = pivoted_kpi_dfs["Max_Used_CE_s_ratio_Flexi_R2"] |
|
|
|
|
|
max_bb_sus_util_ratio_df = max_used_bb_subunits_analysis( |
|
max_bb_sus_util_ratio_df, num_days, threshold, number_of_threshold_days |
|
) |
|
cell_availability_df = cell_availability_analysis(cell_availability_df, num_days) |
|
max_used_ce_ratio_flexi_df = max_used_ce_analysis( |
|
max_used_ce_ratio_flexi_df, num_days, threshold, number_of_threshold_days |
|
) |
|
num_bb_subunits_df = num_bb_subunits_analysis(num_bb_subunits_df, num_days) |
|
licensed_ce_df = avail_ce_analysis(licensed_ce_df, num_days) |
|
|
|
|
|
result_df = pd.concat( |
|
[ |
|
wbts_name_df, |
|
licensed_ce_df, |
|
max_used_ce_dl_df, |
|
max_used_ce_ul_df, |
|
max_avail_ce_df, |
|
max_used_bb_subunits_df, |
|
num_bb_subunits_df, |
|
max_bb_sus_util_ratio_df, |
|
cell_availability_df, |
|
total_cs_traffic_df, |
|
total_data_traffic_df, |
|
max_used_ce_ratio_flexi_df, |
|
], |
|
axis=1, |
|
) |
|
|
|
|
|
result_df = bb_comments_analysis(result_df) |
|
result_df = ce_comments_analysis(result_df) |
|
|
|
return result_df |
|
|
|
|
|
def load_data( |
|
filepath: str, |
|
num_days: int, |
|
threshold: int, |
|
number_of_threshold_days: int, |
|
) -> pd.DataFrame: |
|
""" |
|
Load data from CSV file and perform preprocessing and analysis. |
|
|
|
Args: |
|
filepath: Path to CSV file or uploaded file object |
|
num_days: Number of days to analyze |
|
threshold: Utilization threshold percentage for flagging |
|
number_of_threshold_days: Minimum days above threshold to flag for upgrade |
|
|
|
Returns: |
|
DataFrame with processed and analyzed data |
|
""" |
|
df = pd.read_csv(filepath, delimiter=";") |
|
|
|
|
|
df = create_daily_date(df) |
|
df = kpi_naming_cleaning(df) |
|
|
|
|
|
df = df[["date"] + [col for col in df.columns if col not in ["date"]]] |
|
df = df[[col for col in df.columns if col != "WBTS_name"] + ["WBTS_name"]] |
|
|
|
|
|
df = wbts_kpi_analysis(df, num_days, threshold, number_of_threshold_days) |
|
|
|
|
|
|
|
|
|
return df |
|
|