db_query / queries /process_neighbors.py
DavMelchi's picture
improve neighb report size
c03a5ca
import pandas as pd
from geopy.distance import geodesic
from queries.process_gsm import process_gsm_data
from queries.process_wcdma import process_wcdma_data
from utils.convert_to_excel import convert_dfs, save_dataframe
from utils.utils_vars import UtilsVars
ADCE_INITIAL_COLUMNS = [
"ID_BTS",
"lac_id",
"synchronized",
]
ADJS_INITIAL_COLUMNS = [
"ID_WCEL",
"lac_id",
]
BTS_SOURCE = [
"ID_BTS",
"ID_BCF",
"name",
"BCCH",
"BSIC",
"Longitude",
"Latitude",
]
BTS_TARGET = [
"lac_id",
"ID_BCF",
"name",
"BCCH",
"BSIC",
"Longitude",
"Latitude",
]
WCEL_SOURCE = [
"ID_WCEL",
"name",
"Longitude",
"Latitude",
]
WCEL_TARGET = [
"lac_id",
"name",
"Longitude",
"Latitude",
]
def check_symmetry(df: pd.DataFrame):
"""
Check for symmetric relationships in a dataframe of network neighbors.
For each source-target pair, checks if the reverse target-source pair exists.
Args:
df: pandas DataFrame with columns 'SOURCE_NAME' and 'TARGET_NAME'
Returns:
DataFrame with added 'SYMETRIQUE' column ('YES' if symmetric, 'NO' otherwise)
"""
# Create a set of all (source, target) pairs for fast lookup
pairs = set(zip(df["SOURCE_NAME"], df["TARGET_NAME"]))
# Check for each row if the reverse relationship exists
df["SYMETRIQUE"] = df.apply(
lambda row: (
"YES" if (row["TARGET_NAME"], row["SOURCE_NAME"]) in pairs else "NO"
),
axis=1,
)
return df
def process_neighbors_data(file_path: str):
"""
Process data from the specified file path.
Args:
file_path (str): The path to the file.
"""
# Read the specific sheet into a DataFrame
dfs = pd.read_excel(
file_path,
sheet_name=["ADCE", "ADJS", "ADJI", "ADJG", "ADJW", "BTS", "WCEL"],
engine="calamine",
skiprows=[0],
)
# # Process ADCE data
df_adce = dfs["ADCE"]
df_adce.columns = df_adce.columns.str.replace(r"[ ]", "", regex=True)
df_adce["ID_BTS"] = (
df_adce[["BSC", "BCF", "BTS"]].astype(str).apply("_".join, axis=1)
)
df_adce["lac_id"] = (
df_adce[["adjacentCellIdLac", "adjacentCellIdCI"]]
.astype(str)
.apply("_".join, axis=1)
)
df_adce["lac_id"] = df_adce["lac_id"].str.replace(".0", "")
df_adce = df_adce[ADCE_INITIAL_COLUMNS]
# Process BTS data
df_bts = process_gsm_data(file_path)
df_bts["lac_id"] = (
df_bts[["locationAreaIdLAC", "cellId"]]
.astype(str)
.apply("_".join, axis=1)
.str.replace(".0", "")
)
df_bts_source = df_bts[BTS_SOURCE]
df_bts_source = df_bts_source.rename(columns={"name": "SOURCE_NAME"})
df_bts_target = df_bts[BTS_TARGET]
df_bts_target = df_bts_target.rename(columns={"name": "TARGET_NAME"})
# #create final adce
df_adce_final = pd.merge(df_adce, df_bts_source, on="ID_BTS", how="left")
# Rename SOURCELongitude and Latitude columns
df_adce_final.rename(
columns={
"Longitude": "SOURCE_Longitude",
"Latitude": "SOURCE_Latitude",
"ID_BCF": "SOURCE_ID_BCF",
"BCCH": "SOURCE_BCCH",
"BSIC": "SOURCE_BSIC",
},
inplace=True,
)
df_adce_final = pd.merge(
df_adce_final, df_bts_target, on="lac_id", how="left"
).dropna()
df_adce_final.rename(
columns={
"ID_BTS": "SOURCE_ID",
"lac_id": "TARGET_LAC_ID",
"Longitude": "TARGET_Longitude",
"Latitude": "TARGET_Latitude",
"ID_BCF": "TARGET_ID_BCF",
"BCCH": "TARGET_BCCH",
"BSIC": "TARGET_BSIC",
},
inplace=True,
)
df_adce_final = check_symmetry(df_adce_final)
# Add column "Sync Comment"
# if SOURCE_ID_BCF = TARGET_ID_BCF and synchronized = 0 the "Need synchronized" else ""
df_adce_final["Sync Comment"] = df_adce_final.apply(
lambda row: (
"Need synchronized"
if row["SOURCE_ID_BCF"] == row["TARGET_ID_BCF"] and row["synchronized"] == 0
else ""
),
axis=1,
)
# Add column "Same BSIC" if SOURCE_BSIC = TARGET_BSIC THEN "Yes" else ""
df_adce_final["Same BSIC"] = df_adce_final.apply(
lambda row: "Yes" if row["SOURCE_BSIC"] == row["TARGET_BSIC"] else "",
axis=1,
)
# Add column "Same BCCH" if SOURCE_BCCH = TARGET_BCCH THEN "Yes" else ""
df_adce_final["Same BCCH"] = df_adce_final.apply(
lambda row: "Yes" if row["SOURCE_BCCH"] == row["TARGET_BCCH"] else "",
axis=1,
)
# create distance column
df_adce_final["distance_km"] = df_adce_final.apply(
lambda row: geodesic(
(row["SOURCE_Latitude"], row["SOURCE_Longitude"]),
(row["TARGET_Latitude"], row["TARGET_Longitude"]),
).kilometers,
axis=1,
)
# create final adce
df_adce_final = df_adce_final[
[
"SOURCE_ID",
"SOURCE_NAME",
"SOURCE_Longitude",
"SOURCE_Latitude",
"TARGET_LAC_ID",
"TARGET_NAME",
"TARGET_Longitude",
"TARGET_Latitude",
"SYMETRIQUE",
"synchronized",
"Sync Comment",
"Same BSIC",
"Same BCCH",
"distance_km",
]
]
# process ADJS data
df_adjs = dfs["ADJS"]
df_adjs.columns = df_adjs.columns.str.replace(r"[ ]", "", regex=True)
df_adjs["ID_WCEL"] = (
df_adjs[["RNC", "WBTS", "WCEL"]].astype(str).apply("_".join, axis=1)
)
df_adjs["lac_id"] = (
df_adjs[["AdjsLAC", "AdjsCI"]].astype(str).apply("_".join, axis=1)
)
df_adjs = df_adjs[ADJS_INITIAL_COLUMNS]
# process WCEL DATA
df_wcel = process_wcdma_data(file_path)
df_wcel["ID_WCEL"] = (
df_wcel[["RNC", "WBTS", "WCEL"]].astype(str).apply("_".join, axis=1)
)
df_wcel["lac_id"] = df_wcel[["LAC", "CId"]].astype(str).apply("_".join, axis=1)
df_wcel = df_wcel[["ID_WCEL", "lac_id", "name", "Longitude", "Latitude"]]
df_wcel_source = df_wcel[WCEL_SOURCE]
df_wcel_source = df_wcel_source.rename(columns={"name": "SOURCE_NAME"})
df_wcel_target = df_wcel[WCEL_TARGET]
df_wcel_target = df_wcel_target.rename(columns={"name": "TARGET_NAME"})
# create final adjs
df_adjs_final = pd.merge(df_adjs, df_wcel_source, on="ID_WCEL", how="left")
df_adjs_final = pd.merge(
df_adjs_final, df_wcel_target, on="lac_id", how="left"
).dropna()
df_adjs_final.rename(
columns={"ID_WCEL": "SOURCE_ID", "lac_id": "TARGET_LAC_ID"}, inplace=True
)
df_adjs_final = check_symmetry(df_adjs_final)
# create distance column
df_adjs_final["distance_km"] = df_adjs_final.apply(
lambda row: geodesic(
(row["Latitude_x"], row["Longitude_x"]),
(row["Latitude_y"], row["Longitude_y"]),
).kilometers,
axis=1,
)
# process ADJI DATA
df_adji = dfs["ADJI"]
df_adji.columns = df_adji.columns.str.replace(r"[ ]", "", regex=True)
df_adji["ID_WCEL"] = (
df_adji[["RNC", "WBTS", "WCEL"]].astype(str).apply("_".join, axis=1)
)
df_adji["lac_id"] = (
df_adji[["AdjiLAC", "AdjiCI"]].astype(str).apply("_".join, axis=1)
)
df_adji = df_adji[["ID_WCEL", "lac_id"]]
df_adji_final = pd.merge(df_adji, df_wcel_source, on="ID_WCEL", how="left")
df_adji_final = pd.merge(
df_adji_final, df_wcel_target, on="lac_id", how="left"
).dropna()
df_adji_final.rename(
columns={"ID_WCEL": "SOURCE_ID", "lac_id": "TARGET_LAC_ID"}, inplace=True
)
df_adji_final = check_symmetry(df_adji_final)
# create distance column
df_adji_final["distance_km"] = df_adji_final.apply(
lambda row: geodesic(
(row["Latitude_x"], row["Longitude_x"]),
(row["Latitude_y"], row["Longitude_y"]),
).kilometers,
axis=1,
)
# process ADJG DATA
df_adjg = dfs["ADJG"]
df_adjg.columns = df_adjg.columns.str.replace(r"[ ]", "", regex=True)
df_adjg["ID_WCEL"] = (
df_adjg[["RNC", "WBTS", "WCEL"]].astype(str).apply("_".join, axis=1)
)
df_adjg["lac_id"] = (
df_adjg[["AdjgLAC", "AdjgCI"]].astype(str).apply("_".join, axis=1)
)
df_adjg = df_adjg[["ID_WCEL", "lac_id"]]
df_adjg_final = pd.merge(df_adjg, df_wcel_source, on="ID_WCEL", how="left")
df_adjg_final = pd.merge(
df_adjg_final, df_bts_target, on="lac_id", how="left"
).dropna()
df_adjg_final.rename(
columns={"ID_WCEL": "SOURCE_ID", "lac_id": "TARGET_LAC_ID"}, inplace=True
)
# create distance column
df_adjg_final["distance_km"] = df_adjg_final.apply(
lambda row: geodesic(
(row["Latitude_x"], row["Longitude_x"]),
(row["Latitude_y"], row["Longitude_y"]),
).kilometers,
axis=1,
)
# process ADJW DATA
df_adjw = dfs["ADJW"]
df_adjw.columns = df_adjw.columns.str.replace(r"[ ]", "", regex=True)
df_adjw["ID_BTS"] = (
df_adjw[["BSC", "BCF", "BTS"]].astype(str).apply("_".join, axis=1)
)
df_adjw["lac_id"] = df_adjw[["lac", "AdjwCId"]].astype(str).apply("_".join, axis=1)
df_adjw = df_adjw[["ID_BTS", "lac_id"]]
df_adjw_final = pd.merge(df_adjw, df_bts_source, on="ID_BTS", how="left")
df_adjw_final = pd.merge(
df_adjw_final, df_wcel_target, on="lac_id", how="left"
).dropna()
df_adjw_final.rename(
columns={"ID_BTS": "SOURCE_ID", "lac_id": "TARGET_LAC_ID"}, inplace=True
)
df_adjw_final = check_symmetry(df_adjw_final)
# create distance column
df_adjw_final["distance_km"] = df_adjw_final.apply(
lambda row: geodesic(
(row["Latitude_x"], row["Longitude_x"]),
(row["Latitude_y"], row["Longitude_y"]),
).kilometers,
axis=1,
)
# save_dataframe(df_adjw_final, "ADJW")
return [df_adjw_final, df_adjg_final, df_adji_final, df_adjs_final, df_adce_final]
def process_neighbors_data_to_excel(file_path: str):
neighbors_dfs = process_neighbors_data(file_path)
UtilsVars.neighbors_database = convert_dfs(
neighbors_dfs, ["ADJW", "ADJG", "ADJI", "ADJS", "ADCE"]
)