db_query / queries /process_wcdma.py
DavMelchi's picture
clear Pandas .loc warning
ec8f588
import pandas as pd
from utils.config_band import config_band
from utils.convert_to_excel import convert_dfs, save_dataframe
from utils.extract_code import extract_code_from_mrbts
from utils.kml_creator import generate_kml_from_df
from utils.utils_vars import UtilsVars, WcdmaAnalysisData, get_physical_db
WCEL_COLUMNS = [
"ID_WBTS",
"ID_WCEL",
"RNC",
"WBTS",
"WCEL",
"site_name",
"name",
"code",
"Region",
"AdminCellState",
"CId",
"LAC",
"RAC",
"UARFCN",
"PriScrCode",
"SAC",
"maxCarrierPower",
"PtxPrimaryCPICH",
"CellRange",
"CodeTreeOptTimer",
"CodeTreeOptimisation",
"CodeTreeUsage",
"PRACHDelayRange",
"PrxOffset",
"PrxTarget",
"PrxTargetMax",
"PrxTargetPSMax",
"PrxTargetPSMaxtHSRACH",
"PtxCellMax",
"PtxOffset",
"PtxTarget",
"SmartLTELayeringEnabled",
"HSDPAFmcgIdentifier",
"NrtFmcgIdentifier",
"RtFmcgIdentifier",
"RTWithHSDPAFmcgIdentifier",
"HSDPAFmciIdentifier",
"NrtFmciIdentifier",
"RtFmciIdentifier",
"RTWithHSDPAFmciIdentifier",
"HSDPAFmcsIdentifier",
"HSPAFmcsIdentifier",
"NrtFmcsIdentifier",
"RtFmcsIdentifier",
"RTWithHSDPAFmcsIdentifier",
"RTWithHSPAFmcsIdentifier",
"Sintersearch",
"SintersearchConn",
"Sintrasearch",
"SintrasearchConn",
"Ssearch_RATConn",
"TreselectionFACH",
"TreselectionPCH",
"SectorID",
"Code_Sector",
"code_wcel",
"porteuse",
"band",
]
WBTS_COLUMNS = [
"ID_WBTS",
"site_name",
]
WNCEL_COLUMNS = [
"code_wcel",
"maxCarrierPower",
]
WCDMA_KML_COLUMNS = [
"code",
"name",
"Longitude",
"Latitude",
"Azimut",
"Hauteur",
"LAC",
"CId",
"LAC",
"UARFCN",
"PriScrCode",
"band",
]
def process_wcdma_data(file_path: str):
"""
Process data from the specified file path.
Args:
file_path (str): The path to the file.
"""
# Read the specific sheet into a DataFrame
# df_wcel = pd.read_excel(
# file_path, sheet_name="WCEL", engine="calamine", skiprows=[0]
# )
# df_wbts = pd.read_excel(
# file_path, sheet_name="WBTS", engine="calamine", skiprows=[0]
# )
# df_wncel = pd.read_excel(
# file_path, sheet_name="WNCEL", engine="calamine", skiprows=[0]
# )
dfs = pd.read_excel(
file_path,
sheet_name=["WCEL", "WBTS", "WNCEL"],
engine="calamine",
skiprows=[0],
)
# Process BTS data
df_wcel = dfs["WCEL"]
df_wcel.columns = df_wcel.columns.str.replace(r"[ ]", "", regex=True)
df_wcel["code"] = df_wcel["name"].str.split("_").str[0]
df_wcel["code"] = (
pd.to_numeric(df_wcel["code"], errors="coerce").fillna(0).astype(int)
)
df_wcel["Region"] = df_wcel["name"].str.split("_").str[1]
df_wcel["ID_WCEL"] = (
df_wcel[["RNC", "WBTS", "WCEL"]].astype(str).apply("_".join, axis=1)
)
df_wcel["ID_WBTS"] = df_wcel[["RNC", "WBTS"]].astype(str).apply("_".join, axis=1)
df_wcel["Code_Sector"] = (
df_wcel[["code", "SectorID"]].astype(str).apply("_".join, axis=1)
)
df_wcel["code_wcel"] = df_wcel[["code", "WCEL"]].astype(str).apply("_".join, axis=1)
df_wcel["Code_Sector"] = df_wcel["Code_Sector"].str.replace(".0", "")
df_wcel["porteuse"] = (
df_wcel["UARFCN"].map(UtilsVars.porteuse_mapping).fillna("not found")
)
df_wcel["band"] = df_wcel["UARFCN"].map(UtilsVars.wcdma_band).fillna("not found")
# create config_band dataframe
df_band = config_band(df_wcel)
# Process WBTS data
df_wbts = dfs["WBTS"]
df_wbts.columns = df_wbts.columns.str.replace(r"[ ]", "", regex=True)
df_wbts["ID_WBTS"] = df_wbts[["RNC", "WBTS"]].astype(str).apply("_".join, axis=1)
df_wbts.rename(columns={"name": "site_name"}, inplace=True)
df_wbts = df_wbts[WBTS_COLUMNS]
# Process WNCEL data
df_wncel = dfs["WNCEL"]
df_wncel.columns = df_wncel.columns.str.replace(r"[ ]", "", regex=True)
df_wncel["CODE"] = df_wncel["MRBTS"].apply(extract_code_from_mrbts)
df_wncel["code_wcel"] = (
df_wncel[["CODE", "WNCEL"]].astype(str).apply("_".join, axis=1)
)
df_wncel = df_wncel[WNCEL_COLUMNS]
# Merge dataframes
df_wcel_bcf = pd.merge(df_wcel, df_wbts, on="ID_WBTS", how="left")
df_3g = pd.merge(df_wcel_bcf, df_wncel, on="code_wcel", how="left")
df_3g = df_3g[WCEL_COLUMNS]
df_physical_db = get_physical_db()
df_3g = pd.merge(df_3g, df_band, on="code", how="left")
df_3g = pd.merge(df_3g, df_physical_db, on="Code_Sector", how="left")
# Save dataframes
# save_dataframe(df_wcel, "wcel")
# save_dataframe(df_wcel_bcf, "wbts")
# save_dataframe(df_wncel, "wncel")
# df_3g = save_dataframe(df_3g, "3G")
UtilsVars.all_db_dfs.append(df_3g)
UtilsVars.wcdma_dfs.append(df_3g)
UtilsVars.all_db_dfs_names.append("WCDMA")
# UtilsVars.final_wcdma_database = convert_dfs([df_3g], ["WCDMA"])
return df_3g
# UtilsVars.final_wcdma_database = [df_3g]
# BTS.process_ok = "Done"
def process_wcdma_data_to_excel(file_path: str):
"""
Process WCDMA data from the specified file path and convert it to Excel format
Args:
file_path (str): The path to the file.
"""
wcdma_dfs = process_wcdma_data(file_path)
UtilsVars.final_wcdma_database = convert_dfs([wcdma_dfs], ["WCDMA"])
############################# KML CREATION #################################
def process_wcdma_data_to_kml(file_path: str):
"""
Process WCDMA data from the specified file path and convert it to KML format
Args:
file_path (str): The path to the file.
"""
wcdma_kml_df = process_wcdma_data(file_path)
wcdma_kml_df = wcdma_kml_df[WCDMA_KML_COLUMNS]
# Add colors column base on "band" column
wcdma_kml_df["color"] = wcdma_kml_df["band"].map(UtilsVars.color_mapping)
# Add size column base on "band" column
wcdma_kml_df["size"] = wcdma_kml_df["band"].map(UtilsVars.size_mapping)
# Remove empty rows
wcdma_kml_df = wcdma_kml_df.dropna(subset=["Longitude", "Latitude", "Azimut"])
# Generate kml
UtilsVars.wcdma_kml_file = generate_kml_from_df(wcdma_kml_df)
############################ANALYTICSS AND STATISTICS############################
def wcdma_analaysis(
filepath: str,
# region_list: list
):
"""
Process WCDMA data from the specified file path and convert it to Excel format
Args:
filepath (str): The path to the file.
"""
# wcdma_df = process_wcdma_data(filepath)
wcdma_df: pd.DataFrame = UtilsVars.wcdma_dfs[0]
# filter per list of regions
# wcdma_df = wcdma_df.loc[wcdma_df["Region"].isin(region_list)]
# df to count number of site per rnc
df_site_per_rnc = wcdma_df[["RNC", "code"]]
df_site_per_rnc = df_site_per_rnc.drop_duplicates(subset=["code"], keep="first")
df_site_per_lac = wcdma_df.loc[:, ["RNC", "LAC", "code"]].copy()
df_site_per_lac.loc[:, "code_lac"] = (
df_site_per_lac["code"].astype(str) + "_" + df_site_per_lac["LAC"].astype(str)
)
df_site_per_lac = df_site_per_lac.drop_duplicates(subset=["code_lac"], keep="first")
WcdmaAnalysisData.total_number_of_rnc = wcdma_df["RNC"].nunique()
WcdmaAnalysisData.total_number_of_wcel = wcdma_df["ID_WCEL"].nunique()
WcdmaAnalysisData.number_of_site = len(wcdma_df["site_name"].dropna().unique())
WcdmaAnalysisData.number_of_site_per_rnc = df_site_per_rnc["RNC"].value_counts()
WcdmaAnalysisData.number_of_cell_per_rnc = wcdma_df["RNC"].value_counts()
WcdmaAnalysisData.number_of_empty_wbts_name = wcdma_df["site_name"].isnull().sum()
WcdmaAnalysisData.number_of_empty_wcel_name = wcdma_df["name"].isnull().sum()
WcdmaAnalysisData.wcel_administate_distribution = wcdma_df[
"AdminCellState"
].value_counts()
WcdmaAnalysisData.psc_distribution = wcdma_df["PriScrCode"].value_counts()
# Manage Cells count per LAC and RNC
# Pivot RNC and LAC
WcdmaAnalysisData.number_of_cell_per_lac = (
wcdma_df.groupby(["RNC", "LAC"]).size().reset_index(name="count")
)
# Rename columns
WcdmaAnalysisData.number_of_cell_per_lac = (
WcdmaAnalysisData.number_of_cell_per_lac.rename(
columns={"RNC": "RNC", "LAC": "LAC", "count": "LAC_Count"}
)
)
# Add "RNC_" and "LAC_" prefix
WcdmaAnalysisData.number_of_cell_per_lac["RNC"] = (
"RNC_" + WcdmaAnalysisData.number_of_cell_per_lac["RNC"].astype(str)
)
WcdmaAnalysisData.number_of_cell_per_lac["LAC"] = (
"LAC_" + WcdmaAnalysisData.number_of_cell_per_lac["LAC"].astype(str)
)
##################### Number of site per LAC
WcdmaAnalysisData.number_of_site_per_lac = (
df_site_per_lac.groupby(["RNC", "LAC"]).size().reset_index(name="count")
)
# Rename columns
WcdmaAnalysisData.number_of_site_per_lac = (
WcdmaAnalysisData.number_of_site_per_lac.rename(
columns={"RNC": "RNC", "LAC": "LAC", "count": "Site_Count"}
)
)
# Add "RNC_" and "LAC_" prefix
WcdmaAnalysisData.number_of_site_per_lac["RNC"] = (
"RNC_" + WcdmaAnalysisData.number_of_site_per_lac["RNC"].astype(str)
)
WcdmaAnalysisData.number_of_site_per_lac["LAC"] = (
"LAC_" + WcdmaAnalysisData.number_of_site_per_lac["LAC"].astype(str)
)