import pandas as pd from utils.utils_vars import SiteAnalysisData, UtilsVars GSM_COLUMNS = [ "code", "site_name", "site_config_band", "Region", "number_trx_per_site", "Longitude", "Latitude", "Hauteur", "City", "Adresse", "Commune", "Cercle", ] WCDMA_COLUMNS = [ "code", "site_name", "Region", "site_config_band", "Longitude", "Latitude", "Hauteur", "City", "Adresse", "Commune", "Cercle", ] LTE_COLUMNS = [ "code", "lnbts_name", "site_config_band", "Region", "Longitude", "Latitude", "Hauteur", "City", "Adresse", "Commune", "Cercle", ] CODE_COLUMNS = [ "code", "Region", "Longitude", "Latitude", "Hauteur", "City", "Adresse", "Commune", "Cercle", ] def clean_bands(bands): if pd.isna(bands): return None parts = [p for p in bands.split("/") if p != "nan"] return "/".join(parts) if parts else None def site_db(): gsm_df: pd.DataFrame = UtilsVars.all_db_dfs[0] wcdma_df: pd.DataFrame = UtilsVars.all_db_dfs[3] lte_fdd_df: pd.DataFrame = UtilsVars.all_db_dfs[4] lte_tdd_df: pd.DataFrame = UtilsVars.all_db_dfs[5] gsm_df = gsm_df[GSM_COLUMNS] gsm_df = gsm_df.rename( columns={ "code": "code", "site_name": "gsm_name", "site_config_band": "2G_Bands", } ) gsm_df.drop_duplicates(subset=["code"], keep="first", inplace=True) wcdma_df = wcdma_df[WCDMA_COLUMNS] wcdma_df = wcdma_df.rename( columns={ "code": "code", "site_name": "wcdma_name", "site_config_band": "3G_Bands", } ) wcdma_df.drop_duplicates(subset=["code"], keep="first", inplace=True) lte_fdd_df = lte_fdd_df[LTE_COLUMNS] lte_tdd_df = lte_tdd_df[LTE_COLUMNS] lte_df: pd.DataFrame = pd.concat([lte_fdd_df, lte_tdd_df], ignore_index=False) lte_df = lte_df.rename( columns={ "code": "code", "lnbts_name": "lte_name", "site_config_band": "4G_Bands", } ) lte_df.drop_duplicates(subset=["code"], keep="first", inplace=True) ################################# CODE DATAFRAME############################# gsm_code_df: pd.DataFrame = ( gsm_df[CODE_COLUMNS].copy() if gsm_df is not None else pd.DataFrame() ) wcdma_code_df: pd.DataFrame = ( wcdma_df[CODE_COLUMNS].copy() if wcdma_df is not None else pd.DataFrame() ) lte_code_df: pd.DataFrame = ( lte_df[CODE_COLUMNS].copy() if lte_df is not None else pd.DataFrame() ) code_df: pd.DataFrame = pd.concat( [gsm_code_df, wcdma_code_df, lte_code_df], ignore_index=True ) code_df.drop_duplicates(subset=["code"], keep="first", inplace=True) code_df.dropna(subset=["code"], inplace=True) # order by code code_df.sort_values(by=["code"], inplace=True) # print(code_df) # ################################# SITE DATAFRAME############################# gsm_df_final = gsm_df[ [ "code", "gsm_name", "2G_Bands", "number_trx_per_site", ] ].copy() wcdma_df_final = wcdma_df[["code", "wcdma_name", "3G_Bands"]].copy() lte_df_final = lte_df[["code", "lte_name", "4G_Bands"]].copy() site_df = pd.merge(code_df, gsm_df_final, how="left", on="code") site_df = pd.merge(site_df, wcdma_df_final, how="left", on="code") site_df = pd.merge(site_df, lte_df_final, how="left", on="code") # order by code site_df["site_name"] = ( site_df["gsm_name"].fillna(site_df["wcdma_name"]).fillna(site_df["lte_name"]) ) site_df["all_bands"] = ( (site_df[["2G_Bands", "3G_Bands", "4G_Bands"]]) .astype(str) .apply("/".join, axis=1) ) site_df["all_bands"] = site_df["all_bands"].apply(clean_bands) site_df = site_df[ [ "code", "site_name", "Region", "2G_Bands", "3G_Bands", "4G_Bands", "all_bands", "number_trx_per_site", "Longitude", "Latitude", "Hauteur", "City", "Adresse", "Commune", "Cercle", ] ] site_df.sort_values(by=["code"], inplace=True) UtilsVars.all_db_dfs.append(site_df) UtilsVars.all_db_dfs_names.append("SITE") ####################### SITE ANALYSIS ################################################### SiteAnalysisData.total_number_of_site = len(site_df["code"].unique()) SiteAnalysisData.total_munber_of_gsm_site = site_df["2G_Bands"].notna().sum() SiteAnalysisData.total_number_of_wcdma_site = site_df["3G_Bands"].notna().sum() SiteAnalysisData.total_number_of_lte_site = site_df["4G_Bands"].notna().sum() SiteAnalysisData.gsm_bands_distribution = site_df["2G_Bands"].value_counts( ascending=True ) SiteAnalysisData.wcdma_bands_distribution = site_df["3G_Bands"].value_counts( ascending=True ) SiteAnalysisData.lte_bands_distribution = site_df["4G_Bands"].value_counts( ascending=True ) SiteAnalysisData.all_bands_distribution = site_df["all_bands"].value_counts( ascending=True ) SiteAnalysisData.number_of_trx_per_site_distribution = site_df[ "number_trx_per_site" ].value_counts()