Spaces:

DavMelchi
/

db_query

Running

App Files Files Community

db_query / process_kpi /process_lte_capacity.py

DavMelchi

optimize lte capacity columns name

26cb917 2 months ago

raw

history blame contribute delete

18.6 kB

	import numpy as np
	import pandas as pd

	from queries.process_lte import process_lte_data
	from utils.convert_to_excel import save_dataframe
	from utils.kpi_analysis_utils import (
	LteCapacity,
	analyze_prb_usage,
	cell_availability_analysis,
	create_dfs_per_kpi,
	create_hourly_date,
	kpi_naming_cleaning,
	)

	LTE_ANALYSIS_COLUMNS = [
	"code",
	"code_sector",
	"Region",
	"site_config_band",
	"Longitude",
	"Latitude",
	"LNCEL_name_l800",
	"LNCEL_name_l1800",
	"LNCEL_name_l2300",
	"LNCEL_name_l2600",
	"LNCEL_name_l1800s",
	"avg_prb_usage_bh_l800",
	"avg_prb_usage_bh_l1800",
	"avg_prb_usage_bh_l2300",
	"avg_prb_usage_bh_l2600",
	"avg_prb_usage_bh_l1800s",
	"avg_prb_usage_bh_l800_2nd",
	"avg_prb_usage_bh_l1800_2nd",
	"avg_prb_usage_bh_l2300_2nd",
	"avg_prb_usage_bh_l2600_2nd",
	"avg_prb_usage_bh_l1800s_2nd",
	"avg_act_ues_l800",
	"avg_act_ues_l1800",
	"avg_act_ues_l2300",
	"avg_act_ues_l2600",
	"avg_act_ues_l1800s",
	"avg_dl_thp_l800",
	"avg_dl_thp_l1800",
	"avg_dl_thp_l2300",
	"avg_dl_thp_l2600",
	"avg_dl_thp_l1800s",
	"avg_ul_thp_l800",
	"avg_ul_thp_l1800",
	"avg_ul_thp_l2300",
	"avg_ul_thp_l2600",
	"avg_ul_thp_l1800s",
	"num_congested_cells",
	"num_cells",
	"num_cell_with_kpi",
	"num_down_or_no_kpi_cells",
	"prb_diff_between_cells",
	"load_balance_required",
	"congestion_comment",
	"final_comments",
	]

	LTE_DATABASE_COLUMNS = [
	"code",
	"Region",
	"site_config_band",
	"final_name",
	"Longitude",
	"Latitude",
	]

	KPI_COLUMNS = [
	"date",
	"LNCEL_name",
	"Cell_Avail_excl_BLU",
	"E_UTRAN_Avg_PRB_usage_per_TTI_DL",
	"DL_PRB_Util_p_TTI_Lev_10",
	"Avg_PDCP_cell_thp_UL",
	"Avg_PDCP_cell_thp_DL",
	"Avg_act_UEs_DL",
	]
	PRB_COLUMNS = [
	"LNCEL_name",
	"avg_prb_usage_bh",
	"avg_prb_usage_bh_2nd",
	"avg_act_ues",
	"avg_dl_thp",
	"avg_ul_thp",
	]


	def lte_analysis_logic(
	df: pd.DataFrame,
	prb_usage_threshold: int,
	prb_diff_between_cells_threshold: int,
	) -> pd.DataFrame:
	lte_analysis_logic_df = df.copy()
	lte_analysis_logic_df["num_congested_cells"] = (
	lte_analysis_logic_df[
	[
	"avg_prb_usage_bh_l800",
	"avg_prb_usage_bh_l1800",
	"avg_prb_usage_bh_l2300",
	"avg_prb_usage_bh_l2600",
	"avg_prb_usage_bh_l1800s",
	]
	]
	>= prb_usage_threshold
	).sum(axis=1)

	# Add Number of cells LNCEL_name_l800 LNCEL_name_l1800 LNCEL_name_l2300 LNCEL_name_l2600 LNCEL_name_l1800s
	lte_analysis_logic_df["num_cells"] = lte_analysis_logic_df[
	[
	"LNCEL_name_l800",
	"LNCEL_name_l1800",
	"LNCEL_name_l2300",
	"LNCEL_name_l2600",
	"LNCEL_name_l1800s",
	]
	].count(axis=1)

	# Add Number of cell with KPI
	lte_analysis_logic_df["num_cell_with_kpi"] = lte_analysis_logic_df[
	[
	"avg_prb_usage_bh_l800",
	"avg_prb_usage_bh_l1800",
	"avg_prb_usage_bh_l2300",
	"avg_prb_usage_bh_l2600",
	"avg_prb_usage_bh_l1800s",
	]
	].count(axis=1)

	# Number of Down or No KPI cells = num_cells -num_cell_with_kpi
	lte_analysis_logic_df["num_down_or_no_kpi_cells"] = (
	lte_analysis_logic_df["num_cells"] - lte_analysis_logic_df["num_cell_with_kpi"]
	)

	# Check Max difference between avg_prb_usage_bh_l800 avg_prb_usage_bh_l1800 avg_prb_usage_bh_l2300 avg_prb_usage_bh_l2600 avg_prb_usage_bh_l1800s
	lte_analysis_logic_df["prb_diff_between_cells"] = lte_analysis_logic_df[
	[
	"avg_prb_usage_bh_l800",
	"avg_prb_usage_bh_l1800",
	"avg_prb_usage_bh_l2300",
	"avg_prb_usage_bh_l2600",
	"avg_prb_usage_bh_l1800s",
	]
	].apply(lambda row: max(row) - min(row), axis=1)

	# Add Load balance required column = Yes if prb_diff_between_cells > prb_diff_between_cells_threshold else No
	lte_analysis_logic_df["load_balance_required"] = lte_analysis_logic_df[
	"prb_diff_between_cells"
	].apply(lambda x: "Yes" if x > prb_diff_between_cells_threshold else "No")

	# Add Next band column
	lte_analysis_logic_df["next_band"] = lte_analysis_logic_df["site_config_band"].map(
	LteCapacity.next_band_mapping
	)

	# Add congestion comments
	# if num_congested_cells == 0 and num_down_or_no_kpi_cells == 0 = " No Congestion"
	# if num_congested_cells == 0 and num_down_or_no_kpi_cells > 0 = "No congestion but Down cell"
	# if num_congested_cells > 0 and num_down_or_no_kpi_cells > 0 = "Congestion but Colocated Down Cell"
	# Else Need Action
	conditions = [
	(lte_analysis_logic_df["num_congested_cells"] == 0)
	& (lte_analysis_logic_df["num_down_or_no_kpi_cells"] == 0),
	(lte_analysis_logic_df["num_congested_cells"] == 0)
	& (lte_analysis_logic_df["num_down_or_no_kpi_cells"] > 0),
	(lte_analysis_logic_df["num_congested_cells"] > 0)
	& (lte_analysis_logic_df["num_down_or_no_kpi_cells"] > 0),
	]

	choices = [
	"No Congestion",
	"No congestion but Down cell",
	"Congestion but Colocated Down Cell",
	]

	lte_analysis_logic_df["congestion_comment"] = np.select(
	conditions, choices, default="Need Action"
	)

	# Add "Actions" column
	# if load_balance_required = "Yes" and congestion_comment = "Need Action" then "Load Balancing parameter tuning required"
	# if load_balance_required = "Yes" and congestion_comment = "Need Action" then "Add Layer"
	# Else keep congestion_comment
	conditions = [
	(lte_analysis_logic_df["load_balance_required"] == "Yes")
	& (lte_analysis_logic_df["congestion_comment"] == "Need Action"),
	(lte_analysis_logic_df["load_balance_required"] == "No")
	& (lte_analysis_logic_df["congestion_comment"] == "Need Action"),
	]

	choices = [
	"Load Balancing parameter tuning required",
	"Add Layer",
	]

	lte_analysis_logic_df["actions"] = np.select(
	conditions, choices, default=lte_analysis_logic_df["congestion_comment"]
	)

	# Add Final Comments
	# if "actions" = "Add Layer" then "'Add' + 'next_band''
	# Else keep "actions" as it is
	lte_analysis_logic_df["final_comments"] = lte_analysis_logic_df.apply(
	lambda row: (
	f"Add {row['next_band']}"
	if row["actions"] == "Add Layer"
	else row["actions"]
	),
	axis=1,
	)

	# create column "sector" equal to conteent of "LNCEL_name_l800" if not empty else "LNCEL_name_l1800" if not empty else "LNCEL_name_l2300"
	lte_analysis_logic_df["sector"] = (
	lte_analysis_logic_df["LNCEL_name_l800"]
	.combine_first(lte_analysis_logic_df["LNCEL_name_l1800"])
	.combine_first(lte_analysis_logic_df["LNCEL_name_l2300"])
	.combine_first(lte_analysis_logic_df["LNCEL_name_l2600"])
	.combine_first(lte_analysis_logic_df["LNCEL_name_l1800s"])
	)
	# remove rows where sector is empty
	lte_analysis_logic_df = lte_analysis_logic_df[
	lte_analysis_logic_df["sector"].notna()
	]
	# Add sector_id column if sector contains : '_1_" then 1 elif sector contains : '_2_" then 2 elif sector contains : '_3_" then 3
	lte_analysis_logic_df["sector_id"] = np.where(
	lte_analysis_logic_df["sector"].str.contains("_1_"),
	1,
	np.where(
	lte_analysis_logic_df["sector"].str.contains("_2_"),
	2,
	np.where(lte_analysis_logic_df["sector"].str.contains("_3_"), 3, np.nan),
	),
	)
	# add code_sector column by combine code and sector_id
	lte_analysis_logic_df["code_sector"] = (
	lte_analysis_logic_df["code"].astype(str)
	+ "_"
	+ lte_analysis_logic_df["sector_id"].astype(str)
	)

	# remove '.0' from code_sector
	lte_analysis_logic_df["code_sector"] = lte_analysis_logic_df[
	"code_sector"
	].str.replace(".0", "")

	# lte_analysis_logic_df = lte_analysis_logic_df[LTE_ANALYSIS_COLUMNS]
	return lte_analysis_logic_df


	def dfs_per_band_cell(df: pd.DataFrame) -> pd.DataFrame:
	# Base DataFrame with unique codes, Region, and site_config_band
	all_codes_df = df[
	["code", "Region", "site_config_band", "Longitude", "Latitude"]
	].drop_duplicates()

	# Configuration for sector groups and their respective LNCEL patterns and column suffixes
	# Format: { "group_key": [(lncel_name_pattern_part, column_suffix), ...] }
	# lncel_name_pattern_part will be combined with "_<group_key>" or similar
	# Example: for group "1", pattern "_1_L800" gives suffix "l800"
	sector_groups_config = {
	"1": [
	("_1_L800", "l800"),
	("_1_L1800", "l1800"),
	("_1_L2300", "l2300"),
	("_1_L2600", "l2600"),
	("_1S_L1800", "l1800s"),
	],
	"2": [
	("_2_L800", "l800"),
	("_2_L1800", "l1800"),
	("_2_L2300", "l2300"),
	("_2_L2600", "l2600"),
	("_2S_L1800", "l1800s"),
	],
	"3": [
	("_3_L800", "l800"),
	("_3_L1800", "l1800"),
	("_3_L2300", "l2300"),
	("_3_L2600", "l2600"),
	("_3S_L1800", "l1800s"),
	],
	}

	all_processed_sectors_dfs = []

	for sector_group_key, band_configurations in sector_groups_config.items():
	# Start with the base DataFrame for the current sector group
	current_sector_group_df = all_codes_df.copy()

	for lncel_name_pattern, column_suffix in band_configurations:
	# Filter the original DataFrame for the current LNCEL pattern
	# The pattern assumes LNCEL_name contains something like "SITENAME<lncel_name_pattern>"
	filtered_band_df = df[df["LNCEL_name"].str.contains(lncel_name_pattern)]

	# Select relevant columns and rename them for the merge
	# This avoids pandas automatically adding _x, _y suffixes and then needing to rename them
	df_to_merge = filtered_band_df[
	[
	"code",
	"LNCEL_name",
	"avg_prb_usage_bh",
	"avg_prb_usage_bh_2nd",
	"avg_act_ues",
	"avg_dl_thp",
	"avg_ul_thp",
	]
	].rename(
	columns={
	"LNCEL_name": f"LNCEL_name_{column_suffix}",
	"avg_prb_usage_bh": f"avg_prb_usage_bh_{column_suffix}",
	"avg_prb_usage_bh_2nd": f"avg_prb_usage_bh_{column_suffix}_2nd",
	"avg_act_ues": f"avg_act_ues_{column_suffix}",
	"avg_dl_thp": f"avg_dl_thp_{column_suffix}",
	"avg_ul_thp": f"avg_ul_thp_{column_suffix}",
	}
	)

	# Perform a left merge
	current_sector_group_df = pd.merge(
	current_sector_group_df, df_to_merge, on="code", how="left"
	)

	all_processed_sectors_dfs.append(current_sector_group_df)

	# Concatenate all the processed sector DataFrames
	all_sectors_dfs = pd.concat(all_processed_sectors_dfs, axis=0, ignore_index=True)
	# save_dataframe(all_sectors_dfs, "all_sectors_dfs.csv")

	return all_sectors_dfs


	def lte_database_for_capacity(dump_path: str):
	dfs = process_lte_data(dump_path)
	lte_fdd = dfs[0]
	lte_tdd = dfs[1]

	lte_fdd = lte_fdd[LTE_DATABASE_COLUMNS]
	lte_tdd = lte_tdd[LTE_DATABASE_COLUMNS]

	lte_db = pd.concat([lte_fdd, lte_tdd], axis=0)

	# rename final_name to LNCEL_name
	lte_db = lte_db.rename(columns={"final_name": "LNCEL_name"})

	# save_dataframe(lte_db, "LTE_Database.csv")
	return lte_db


	def lte_bh_dfs_per_kpi(
	dump_path: str,
	df: pd.DataFrame,
	number_of_kpi_days: int = 7,
	availability_threshold: int = 95,
	prb_usage_threshold: int = 80,
	prb_diff_between_cells_threshold: int = 20,
	number_of_threshold_days: int = 3,
	main_prb_to_use: str = "",
	) -> pd.DataFrame:

	# print(df.columns)

	pivoted_kpi_dfs = create_dfs_per_kpi(
	df=df,
	pivot_date_column="date",
	pivot_name_column="LNCEL_name",
	kpi_columns_from=2,
	)
	cell_availability_df = cell_availability_analysis(
	df=pivoted_kpi_dfs["Cell_Avail_excl_BLU"],
	days=number_of_kpi_days,
	availability_threshold=availability_threshold,
	)
	prb_usage_df = analyze_prb_usage(
	df=pivoted_kpi_dfs["E_UTRAN_Avg_PRB_usage_per_TTI_DL"],
	number_of_kpi_days=number_of_kpi_days,
	prb_usage_threshold=prb_usage_threshold,
	analysis_type="BH",
	number_of_threshold_days=number_of_threshold_days,
	suffix="" if main_prb_to_use == "E-UTRAN Avg PRB usage per TTI DL" else "_2nd",
	)
	prb_lev10_usage_df = analyze_prb_usage(
	df=pivoted_kpi_dfs["DL_PRB_Util_p_TTI_Lev_10"],
	number_of_kpi_days=number_of_kpi_days,
	prb_usage_threshold=prb_usage_threshold,
	analysis_type="BH",
	number_of_threshold_days=number_of_threshold_days,
	suffix="" if main_prb_to_use == "DL PRB Util p TTI Lev_10" else "_2nd",
	)
	act_ues_df = pivoted_kpi_dfs["Avg_act_UEs_DL"]
	# Add Max and avg columns for act_ues_df
	act_ues_df["max_act_ues"] = act_ues_df.max(axis=1)
	act_ues_df["avg_act_ues"] = act_ues_df.mean(axis=1)
	dl_thp_df = pivoted_kpi_dfs["Avg_PDCP_cell_thp_DL"]
	# Add Max and avg columns for dl_thp_df
	dl_thp_df["max_dl_thp"] = dl_thp_df.max(axis=1)
	dl_thp_df["avg_dl_thp"] = dl_thp_df.mean(axis=1)
	ul_thp_df = pivoted_kpi_dfs["Avg_PDCP_cell_thp_UL"]
	# Add Max and avg columns for ul_thp_df
	ul_thp_df["max_ul_thp"] = ul_thp_df.max(axis=1)
	ul_thp_df["avg_ul_thp"] = ul_thp_df.mean(axis=1)

	bh_kpi_df = pd.concat(
	[
	cell_availability_df,
	prb_lev10_usage_df,
	prb_usage_df,
	act_ues_df,
	dl_thp_df,
	ul_thp_df,
	],
	axis=1,
	)
	bh_kpi_df = bh_kpi_df.reset_index()
	prb_df = bh_kpi_df[PRB_COLUMNS]

	# drop row if lnCEL_name is empty or 1
	prb_df = prb_df[prb_df["LNCEL_name"].str.len() > 3]
	# prb_df = prb_df.reset_index()
	prb_df = prb_df.droplevel(level=1, axis=1) # Drop the first level (date)
	# prb_df = prb_df.reset_index()
	# prb_df["code"] = prb_df["LNCEL_name"].str.split("_").str[0]

	lte_db = lte_database_for_capacity(dump_path)

	db_and_prb = pd.merge(lte_db, prb_df, on="LNCEL_name", how="left")

	# if avg_prb_usage_bh is "" then set it to "cell exists in dump but not in BH report"
	# db_and_prb.loc[db_and_prb["avg_prb_usage_bh"].isnull(), "avg_prb_usage_bh"] = (
	# "cell exists in dump but not in BH report"
	# )
	# drop row if lnCEL_name is empty or 1
	db_and_prb = db_and_prb[db_and_prb["LNCEL_name"].str.len() > 3]

	lte_analysis_df = dfs_per_band_cell(db_and_prb)
	lte_analysis_df = lte_analysis_logic(
	lte_analysis_df,
	prb_usage_threshold,
	prb_diff_between_cells_threshold,
	)

	lte_analysis_df = lte_analysis_df[LTE_ANALYSIS_COLUMNS]
	# Rename columns
	lte_analysis_df = lte_analysis_df.rename(
	columns={
	"LNCEL_name_l800": "name_l800",
	"LNCEL_name_l1800": "name_l1800",
	"LNCEL_name_l2300": "name_l2300",
	"LNCEL_name_l2600": "name_l2600",
	"LNCEL_name_l1800s": "name_l1800s",
	"avg_prb_usage_bh_l800": "prb_l800",
	"avg_prb_usage_bh_l1800": "prb_l1800",
	"avg_prb_usage_bh_l2300": "prb_l2300",
	"avg_prb_usage_bh_l2600": "prb_l2600",
	"avg_prb_usage_bh_l1800s": "prb_l1800s",
	"avg_prb_usage_bh_l800_2nd": "prb_l800_2nd",
	"avg_prb_usage_bh_l1800_2nd": "prb_l1800_2nd",
	"avg_prb_usage_bh_l2300_2nd": "prb_l2300_2nd",
	"avg_prb_usage_bh_l2600_2nd": "prb_l2600_2nd",
	"avg_prb_usage_bh_l1800s_2nd": "prb_l1800s_2nd",
	"avg_act_ues_l800": "act_ues_l800",
	"avg_act_ues_l1800": "act_ues_l1800",
	"avg_act_ues_l2300": "act_ues_l2300",
	"avg_act_ues_l2600": "act_ues_l2600",
	"avg_act_ues_l1800s": "act_ues_l1800s",
	"avg_dl_thp_l800": "dl_thp_l800",
	"avg_dl_thp_l1800": "dl_thp_l1800",
	"avg_dl_thp_l2300": "dl_thp_l2300",
	"avg_dl_thp_l2600": "dl_thp_l2600",
	"avg_dl_thp_l1800s": "dl_thp_l1800s",
	"avg_ul_thp_l800": "ul_thp_l800",
	"avg_ul_thp_l1800": "ul_thp_l1800",
	"avg_ul_thp_l2300": "ul_thp_l2300",
	"avg_ul_thp_l2600": "ul_thp_l2600",
	"avg_ul_thp_l1800s": "ul_thp_l1800s",
	}
	)

	return [bh_kpi_df, lte_analysis_df]


	def process_lte_bh_report(
	dump_path: str,
	bh_report_path: str,
	num_last_days: int,
	num_threshold_days: int,
	availability_threshold: float,
	prb_usage_threshold: float,
	prb_diff_between_cells_threshold: float,
	main_prb_to_use: str,
	) -> dict:
	"""
	Process LTE Busy Hour report and perform capacity analysis

	Args:
	bh_report_path: Path to BH report CSV file
	num_last_days: Number of last days for analysis
	num_threshold_days: Number of days for threshold calculation
	availability_threshold: Minimum required availability
	prb_usage_threshold: Maximum allowed PRB usage
	prb_diff_between_cells_threshold: Maximum allowed PRB usage difference between cells

	Returns:
	Dictionary containing analysis results and DataFrames
	"""
	LteCapacity.final_results = None
	# lte_db_dfs = lte_database_for_capacity(dump_path)

	# Read BH report
	df = pd.read_csv(bh_report_path, delimiter=";")
	df = kpi_naming_cleaning(df)
	# print(df.columns)
	df = create_hourly_date(df)
	df = df[KPI_COLUMNS]
	pivoted_kpi_dfs = lte_bh_dfs_per_kpi(
	dump_path=dump_path,
	df=df,
	number_of_kpi_days=num_last_days,
	availability_threshold=availability_threshold,
	prb_usage_threshold=prb_usage_threshold,
	prb_diff_between_cells_threshold=prb_diff_between_cells_threshold,
	number_of_threshold_days=num_threshold_days,
	main_prb_to_use=main_prb_to_use,
	)

	# save_dataframe(pivoted_kpi_dfs, "LTE_BH_Report.csv")
	return pivoted_kpi_dfs