Spaces:

DavMelchi
/

db_query

Running

App Files Files Community

db_query / process_kpi /process_wcel_capacity.py

DavMelchi

Add City to DataBase

002ceab 2 months ago

raw

history blame contribute delete

12.8 kB

	import pandas as pd

	from utils.kpi_analysis_utils import (
	analyze_fails_kpi,
	cell_availability_analysis,
	combine_comments,
	create_daily_date,
	create_dfs_per_kpi,
	kpi_naming_cleaning,
	summarize_fails_comments,
	)
	from utils.utils_vars import get_physical_db

	tx_comments_mapping = {
	"iub_frameloss exceeded threshold": "iub frameloss",
	"iub_frameloss exceeded threshold, hsdpa_congestion_rate_iub exceeded threshold": "iub frameloss and hsdpa iub congestion",
	"hsdpa_congestion_rate_iub exceeded threshold": "hsdpa iub congestion",
	}
	operational_comments_mapping = {
	"Down Site": "Down Cell",
	"iub frameloss, instability": "Availability and TX issues",
	"iub frameloss and hsdpa iub congestion, Availability OK": "TX issues",
	"iub frameloss, Availability OK": "TX issues",
	"critical instability": "Availability issues",
	"iub frameloss, critical instability": "Availability and TX issues",
	"iub frameloss and hsdpa iub congestion, instability": "Availability and TX issues",
	"Availability OK": "Site OK",
	"hsdpa iub congestion, instability": "Availability and TX issues",
	"instability": "Availability issues",
	"hsdpa iub congestion, Availability OK": "TX issues",
	"iub frameloss and hsdpa iub congestion, critical instability": "Availability and TX issues",
	"hsdpa iub congestion, critical instability": "Availability and TX issues",
	}

	fails_comments_mapping = {
	"ac, ac_dl, bts, code fails": "Power, Bts and Code fails",
	"bts fails": "Bts fails",
	"ac, bts, code fails": "Power and Code fails",
	"ac, code fails": "Power fails",
	"ac fails": "Power fails",
	"ac, ac_dl fails": "Power fails",
	"ac, bts fails": "Power and Bts fails",
	"ac, ac_dl, bts fails": "Power and Bts fails",
	"ac, ac_dl, code fails": "Power and Code fails",
	"ac, ac_ul, bts, code fails": "Power, Bts and Code fails",
	"ac, ac_dl, ac_ul, bts, code fails": "Power, Bts and Code fails",
	}

	KPI_COLUMNS = [
	"WCEL_name",
	"date",
	"Cell_Availability_excluding_blocked_by_user_state_BLU",
	"Total_CS_traffic_Erl",
	"HSDPA_TRAFFIC_VOLUME",
	"HSDPA_USER_THROUGHPUT",
	"Max_simult_HSDPA_users",
	"IUB_LOSS_CC_FRAME_LOSS_IND_M1022C71",
	"HSDPA_congestion_rate_in_Iub",
	"rrc_conn_stp_fail_ac_M1001C3",
	"RRC_CONN_STP_FAIL_AC_UL_M1001C731",
	"RRC_CONN_STP_FAIL_AC_DL_M1001C732",
	"RRC_CONN_STP_FAIL_AC_COD_M1001C733",
	"rrc_conn_stp_fail_bts_M1001C4",
	]

	WCEL_ANALYSIS_COLUMNS = [
	"WCEL_name",
	"Average_cell_availability_daily",
	"number_of_days_exceeding_availability_threshold_daily",
	"availability_comment_daily",
	"sum_traffic_cs",
	"sum_traffic_dl",
	"max_dl_throughput",
	"avg_dl_throughput",
	"max_users",
	"max_iub_frameloss",
	"number_of_days_with_iub_frameloss_exceeded",
	"max_hsdpa_congestion_rate_iub",
	"number_of_days_with_hsdpa_congestion_rate_iub_exceeded",
	"max_rrc_fail_ac",
	"number_of_days_with_rrc_fail_ac_exceeded",
	"max_rrc_fail_ac_ul",
	"number_of_days_with_rrc_fail_ac_ul_exceeded",
	"max_rrc_fail_ac_dl",
	"number_of_days_with_rrc_fail_ac_dl_exceeded",
	"max_rrc_fail_code",
	"number_of_days_with_rrc_fail_code_exceeded",
	"max_rrc_fail_bts",
	"number_of_days_with_rrc_fail_bts_exceeded",
	"tx_congestion_comments",
	"operational_comments",
	"fails_comments",
	"final_comments",
	]


	class WcelCapacity:
	final_results: pd.DataFrame = None


	def wcel_kpi_analysis(
	df: pd.DataFrame,
	num_last_days: int,
	num_threshold_days: int,
	availability_threshold: int,
	iub_frameloss_threshold: int,
	hsdpa_congestion_rate_iub_threshold: int,
	fails_treshold: int,
	) -> pd.DataFrame:
	pivoted_kpi_dfs = create_dfs_per_kpi(
	df=df,
	pivot_date_column="date",
	pivot_name_column="WCEL_name",
	kpi_columns_from=2,
	)
	cell_availability_df = cell_availability_analysis(
	df=pivoted_kpi_dfs["Cell_Availability_excluding_blocked_by_user_state_BLU"],
	days=num_last_days,
	availability_threshold=availability_threshold,
	)

	# Trafics, throughput and max users
	trafic_cs_df = pivoted_kpi_dfs["Total_CS_traffic_Erl"]
	hsdpa_traffic_df = pivoted_kpi_dfs["HSDPA_TRAFFIC_VOLUME"]
	hsdpa_user_throughput_df = pivoted_kpi_dfs["HSDPA_USER_THROUGHPUT"]
	max_simult_hsdpa_users_df = pivoted_kpi_dfs["Max_simult_HSDPA_users"]
	# Add Max of Trafics, throughput and max users
	trafic_cs_df["sum_traffic_cs"] = trafic_cs_df.sum(axis=1)
	hsdpa_traffic_df["sum_traffic_dl"] = hsdpa_traffic_df.sum(axis=1)
	hsdpa_user_throughput_df["max_dl_throughput"] = hsdpa_user_throughput_df.max(axis=1)
	max_simult_hsdpa_users_df["max_users"] = max_simult_hsdpa_users_df.max(axis=1)
	# add average of Trafics, throughput and max users
	hsdpa_user_throughput_df["avg_dl_throughput"] = hsdpa_user_throughput_df.mean(
	axis=1
	)
	max_simult_hsdpa_users_df["avg_users"] = max_simult_hsdpa_users_df.mean(axis=1)

	# TX Congestion
	iub_frameloss_df = pivoted_kpi_dfs["IUB_LOSS_CC_FRAME_LOSS_IND_M1022C71"]
	hsdpa_congestion_rate_iub_df = pivoted_kpi_dfs["HSDPA_congestion_rate_in_Iub"]

	iub_frameloss_df = analyze_fails_kpi(
	df=iub_frameloss_df,
	number_of_kpi_days=num_last_days,
	number_of_threshold_days=num_threshold_days,
	kpi_threshold=iub_frameloss_threshold,
	kpi_column_name="iub_frameloss",
	)
	hsdpa_congestion_rate_iub_df = analyze_fails_kpi(
	df=hsdpa_congestion_rate_iub_df,
	number_of_kpi_days=num_last_days,
	number_of_threshold_days=num_threshold_days,
	kpi_threshold=hsdpa_congestion_rate_iub_threshold,
	kpi_column_name="hsdpa_congestion_rate_iub",
	)

	# Fails
	rrc_conn_stp_fail_ac_df = analyze_fails_kpi(
	df=pivoted_kpi_dfs["rrc_conn_stp_fail_ac_M1001C3"],
	number_of_kpi_days=num_last_days,
	number_of_threshold_days=num_threshold_days,
	kpi_threshold=fails_treshold,
	kpi_column_name="rrc_fail_ac",
	)
	rrc_conn_stp_fail_ac_ul_df = analyze_fails_kpi(
	df=pivoted_kpi_dfs["RRC_CONN_STP_FAIL_AC_UL_M1001C731"],
	number_of_kpi_days=num_last_days,
	number_of_threshold_days=num_threshold_days,
	kpi_threshold=fails_treshold,
	kpi_column_name="rrc_fail_ac_ul",
	)
	rrc_conn_stp_fail_ac_dl_df = analyze_fails_kpi(
	df=pivoted_kpi_dfs["RRC_CONN_STP_FAIL_AC_DL_M1001C732"],
	number_of_kpi_days=num_last_days,
	number_of_threshold_days=num_threshold_days,
	kpi_threshold=fails_treshold,
	kpi_column_name="rrc_fail_ac_dl",
	)
	rrc_conn_stp_fail_ac_cod_df = analyze_fails_kpi(
	df=pivoted_kpi_dfs["RRC_CONN_STP_FAIL_AC_COD_M1001C733"],
	number_of_kpi_days=num_last_days,
	number_of_threshold_days=num_threshold_days,
	kpi_threshold=fails_treshold,
	kpi_column_name="rrc_fail_code",
	)
	rrc_conn_stp_fail_bts_df = analyze_fails_kpi(
	df=pivoted_kpi_dfs["rrc_conn_stp_fail_bts_M1001C4"],
	number_of_kpi_days=num_last_days,
	number_of_threshold_days=num_threshold_days,
	kpi_threshold=fails_treshold,
	kpi_column_name="rrc_fail_bts",
	)

	kpi_df = pd.concat(
	[
	cell_availability_df,
	trafic_cs_df,
	hsdpa_traffic_df,
	hsdpa_user_throughput_df,
	max_simult_hsdpa_users_df,
	iub_frameloss_df,
	hsdpa_congestion_rate_iub_df,
	rrc_conn_stp_fail_ac_df,
	rrc_conn_stp_fail_ac_ul_df,
	rrc_conn_stp_fail_ac_dl_df,
	rrc_conn_stp_fail_ac_cod_df,
	rrc_conn_stp_fail_bts_df,
	],
	axis=1,
	)
	kpi_df = kpi_df.reset_index()

	kpi_df = combine_comments(
	kpi_df,
	"iub_frameloss_comment",
	"hsdpa_congestion_rate_iub_comment",
	new_column="tx_congestion_comments",
	)
	kpi_df["tx_congestion_comments"] = kpi_df["tx_congestion_comments"].apply(
	lambda x: tx_comments_mapping.get(x, x)
	)

	kpi_df = combine_comments(
	kpi_df,
	"tx_congestion_comments",
	"availability_comment_daily",
	new_column="operational_comments",
	)
	kpi_df["operational_comments"] = kpi_df["operational_comments"].apply(
	lambda x: operational_comments_mapping.get(x, x)
	)
	kpi_df = combine_comments(
	kpi_df,
	"rrc_fail_ac_comment",
	"rrc_fail_ac_ul_comment",
	"rrc_fail_ac_dl_comment",
	"rrc_fail_code_comment",
	"rrc_fail_bts_comment",
	new_column="fails_comments",
	)
	kpi_df["fails_comments"] = kpi_df["fails_comments"].apply(summarize_fails_comments)
	kpi_df["fails_comments"] = kpi_df["fails_comments"].apply(
	lambda x: fails_comments_mapping.get(x, x)
	)
	kpi_df = combine_comments(
	kpi_df,
	"operational_comments",
	"fails_comments",
	new_column="final_comments",
	)

	wcel_analysis_df = kpi_df[WCEL_ANALYSIS_COLUMNS]
	wcel_analysis_df = wcel_analysis_df.droplevel(level=1, axis=1)

	# Rename
	wcel_analysis_df = wcel_analysis_df.rename(
	columns={
	"WCEL_name": "name",
	"Average_cell_availability_daily": "Avg_availability",
	"number_of_days_exceeding_availability_threshold_daily": "Avail_exceed_days",
	"availability_comment_daily": "availability_comment",
	"number_of_days_with_iub_frameloss_exceeded": "iub_frameloss_exceed_days",
	"number_of_days_with_hsdpa_congestion_rate_iub_exceeded": "hsdpa_iub_exceed_days",
	"number_of_days_with_rrc_fail_ac_exceeded": "ac_fail_exceed_days",
	"number_of_days_with_rrc_fail_ac_ul_exceeded": "ac_ul_fail_exceed_days",
	"number_of_days_with_rrc_fail_ac_dl_exceeded": "ac_dl_fail_exceed_days",
	"number_of_days_with_rrc_fail_code_exceeded": "code_fail_exceed_days",
	"number_of_days_with_rrc_fail_bts_exceeded": "bts_fail_exceed_days",
	}
	)
	# remove row if name less than 5 characters
	wcel_analysis_df = wcel_analysis_df[wcel_analysis_df["name"].str.len() >= 5]

	wcel_analysis_df["code"] = wcel_analysis_df["name"].str.split("_").str[0]
	wcel_analysis_df["code"] = (
	pd.to_numeric(wcel_analysis_df["code"], errors="coerce").fillna(0).astype(int)
	)
	wcel_analysis_df["Region"] = wcel_analysis_df["name"].str.split("_").str[1]
	# move code to the first column
	wcel_analysis_df = wcel_analysis_df[
	["code", "Region"]
	+ [col for col in wcel_analysis_df if col != "code" and col != "Region"]
	]

	# Load physical database
	physical_db: pd.DataFrame = get_physical_db()

	# Convert code_sector to code
	physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0]
	# remove duplicates
	physical_db = physical_db.drop_duplicates(subset="code")

	# keep only code and longitude and latitude
	physical_db = physical_db[["code", "Longitude", "Latitude", "City"]]

	physical_db["code"] = (
	pd.to_numeric(physical_db["code"], errors="coerce").fillna(0).astype(int)
	)

	wcel_analysis_df = pd.merge(
	wcel_analysis_df,
	physical_db,
	on="code",
	how="left",
	)

	return [wcel_analysis_df, kpi_df]


	def load_and_process_wcel_capacity_data(
	uploaded_file: pd.DataFrame,
	num_last_days: int,
	num_threshold_days: int,
	availability_threshold: int,
	iub_frameloss_threshold: int,
	hsdpa_congestion_rate_iub_threshold: int,
	fails_treshold: int,
	) -> pd.DataFrame:
	"""
	Load and process data for WCEL capacity analysis.

	Args:
	uploaded_file: Uploaded CSV file containing WCEL capacity data
	num_last_days: Number of days for analysis
	num_threshold_days: Minimum days above threshold to flag for upgrade
	availability_threshold: Utilization threshold percentage for flagging
	iub_frameloss_threshold: Utilization threshold percentage for flagging
	hsdpa_congestion_rate_iub_threshold: Utilization threshold percentage for flagging
	fails_treshold: Utilization threshold percentage for flagging

	Returns:
	Processed DataFrame with WCEL capacity analysis results
	"""
	# Load data
	df = pd.read_csv(uploaded_file, delimiter=";")
	df = kpi_naming_cleaning(df)
	df = create_daily_date(df)
	df = df[KPI_COLUMNS]
	dfs = wcel_kpi_analysis(
	df,
	num_last_days,
	num_threshold_days,
	availability_threshold,
	iub_frameloss_threshold,
	hsdpa_congestion_rate_iub_threshold,
	fails_treshold,
	)
	return dfs