db_query / queries /process_neighbors.py
DavMelchi's picture
Handle dump headers on first or second row
7efc151
import pandas as pd
from geopy.distance import geodesic
from queries.process_gsm import process_gsm_data
from queries.process_wcdma import process_wcdma_data
from utils.convert_to_excel import convert_dfs, save_dataframe
from utils.dump_excel import read_dump_excel
from utils.utils_vars import UtilsVars
ADCE_INITIAL_COLUMNS = [
"ID_BTS",
"lac_id",
"synchronized",
]
ADJS_INITIAL_COLUMNS = [
"ID_WCEL",
"lac_id",
]
BTS_SOURCE = [
"ID_BTS",
"ID_BCF",
"name",
"BCCH",
"BSIC",
"Longitude",
"Latitude",
]
BTS_TARGET = [
"lac_id",
"ID_BCF",
"name",
"BCCH",
"BSIC",
"Longitude",
"Latitude",
]
WCEL_SOURCE = [
"ID_WCEL",
"name",
"Longitude",
"Latitude",
]
WCEL_TARGET = [
"lac_id",
"name",
"Longitude",
"Latitude",
]
def _join_cols(df: pd.DataFrame, columns: list[str], sep: str = "_") -> pd.Series:
joined = df[columns[0]].astype(str)
for col in columns[1:]:
joined = joined.str.cat(df[col].astype(str), sep=sep)
return joined
def _geodesic_distance_km(
df: pd.DataFrame,
source_lat_col: str,
source_lon_col: str,
target_lat_col: str,
target_lon_col: str,
) -> list[float]:
return [
geodesic((src_lat, src_lon), (tgt_lat, tgt_lon)).kilometers
for src_lat, src_lon, tgt_lat, tgt_lon in zip(
df[source_lat_col],
df[source_lon_col],
df[target_lat_col],
df[target_lon_col],
)
]
def check_symmetry(df: pd.DataFrame):
"""
Check for symmetric relationships in a dataframe of network neighbors.
For each source-target pair, checks if the reverse target-source pair exists.
Args:
df: pandas DataFrame with columns 'SOURCE_NAME' and 'TARGET_NAME'
Returns:
DataFrame with added 'SYMETRIQUE' column ('YES' if symmetric, 'NO' otherwise)
"""
# Create a set of all (source, target) pairs for fast lookup
pairs = set(zip(df["SOURCE_NAME"], df["TARGET_NAME"]))
# Check for each row if the reverse relationship exists
df["SYMETRIQUE"] = [
"YES" if (target_name, source_name) in pairs else "NO"
for source_name, target_name in zip(df["SOURCE_NAME"], df["TARGET_NAME"])
]
return df
def process_neighbors_data(file_path: str):
"""
Process data from the specified file path.
Args:
file_path (str): The path to the file.
"""
# Read the specific sheet into a DataFrame
dfs = read_dump_excel(
file_path,
sheet_name=["ADCE", "ADJS", "ADJI", "ADJG", "ADJW", "BTS", "WCEL"],
expected_columns=["BSC", "BCF", "BTS", "adjacentCellIdLac"],
)
# # Process ADCE data
df_adce = dfs["ADCE"]
df_adce.columns = df_adce.columns.str.replace(r"[ ]", "", regex=True)
df_adce["ID_BTS"] = _join_cols(df_adce, ["BSC", "BCF", "BTS"])
df_adce["lac_id"] = _join_cols(df_adce, ["adjacentCellIdLac", "adjacentCellIdCI"])
df_adce["lac_id"] = df_adce["lac_id"].str.replace(".0", "")
df_adce = df_adce[ADCE_INITIAL_COLUMNS]
# Process BTS data
df_bts = process_gsm_data(file_path)
df_bts["lac_id"] = _join_cols(df_bts, ["locationAreaIdLAC", "cellId"]).str.replace(
".0", ""
)
df_bts_source = df_bts[BTS_SOURCE]
df_bts_source = df_bts_source.rename(columns={"name": "SOURCE_NAME"})
df_bts_target = df_bts[BTS_TARGET]
df_bts_target = df_bts_target.rename(columns={"name": "TARGET_NAME"})
# #create final adce
df_adce_final = pd.merge(df_adce, df_bts_source, on="ID_BTS", how="left")
# Rename SOURCELongitude and Latitude columns
df_adce_final.rename(
columns={
"Longitude": "SOURCE_Longitude",
"Latitude": "SOURCE_Latitude",
"ID_BCF": "SOURCE_ID_BCF",
"BCCH": "SOURCE_BCCH",
"BSIC": "SOURCE_BSIC",
},
inplace=True,
)
df_adce_final = pd.merge(
df_adce_final, df_bts_target, on="lac_id", how="left"
).dropna()
df_adce_final.rename(
columns={
"ID_BTS": "SOURCE_ID",
"lac_id": "TARGET_LAC_ID",
"Longitude": "TARGET_Longitude",
"Latitude": "TARGET_Latitude",
"ID_BCF": "TARGET_ID_BCF",
"BCCH": "TARGET_BCCH",
"BSIC": "TARGET_BSIC",
},
inplace=True,
)
df_adce_final = check_symmetry(df_adce_final)
# Add column "Sync Comment"
# if SOURCE_ID_BCF = TARGET_ID_BCF and synchronized = 0 the "Need synchronized" else ""
df_adce_final["Sync Comment"] = ""
df_adce_final.loc[
(df_adce_final["SOURCE_ID_BCF"] == df_adce_final["TARGET_ID_BCF"])
& (df_adce_final["synchronized"] == 0),
"Sync Comment",
] = "Need synchronized"
# Add column "Same BSIC" if SOURCE_BSIC = TARGET_BSIC THEN "Yes" else ""
df_adce_final["Same BSIC"] = ""
df_adce_final.loc[
df_adce_final["SOURCE_BSIC"] == df_adce_final["TARGET_BSIC"], "Same BSIC"
] = "Yes"
# Add column "Same BCCH" if SOURCE_BCCH = TARGET_BCCH THEN "Yes" else ""
df_adce_final["Same BCCH"] = ""
df_adce_final.loc[
df_adce_final["SOURCE_BCCH"] == df_adce_final["TARGET_BCCH"], "Same BCCH"
] = "Yes"
# create distance column
df_adce_final["distance_km"] = _geodesic_distance_km(
df_adce_final,
"SOURCE_Latitude",
"SOURCE_Longitude",
"TARGET_Latitude",
"TARGET_Longitude",
)
# create final adce
df_adce_final = df_adce_final[
[
"SOURCE_ID",
"SOURCE_NAME",
"SOURCE_Longitude",
"SOURCE_Latitude",
"TARGET_LAC_ID",
"TARGET_NAME",
"TARGET_Longitude",
"TARGET_Latitude",
"SYMETRIQUE",
"synchronized",
"Sync Comment",
"Same BSIC",
"Same BCCH",
"distance_km",
]
]
# process ADJS data
df_adjs = dfs["ADJS"]
df_adjs.columns = df_adjs.columns.str.replace(r"[ ]", "", regex=True)
df_adjs["ID_WCEL"] = _join_cols(df_adjs, ["RNC", "WBTS", "WCEL"])
df_adjs["lac_id"] = _join_cols(df_adjs, ["AdjsLAC", "AdjsCI"])
df_adjs = df_adjs[ADJS_INITIAL_COLUMNS]
# process WCEL DATA
df_wcel = process_wcdma_data(file_path)
df_wcel["ID_WCEL"] = _join_cols(df_wcel, ["RNC", "WBTS", "WCEL"])
df_wcel["lac_id"] = _join_cols(df_wcel, ["LAC", "CId"])
df_wcel = df_wcel[["ID_WCEL", "lac_id", "name", "Longitude", "Latitude"]]
df_wcel_source = df_wcel[WCEL_SOURCE]
df_wcel_source = df_wcel_source.rename(columns={"name": "SOURCE_NAME"})
df_wcel_target = df_wcel[WCEL_TARGET]
df_wcel_target = df_wcel_target.rename(columns={"name": "TARGET_NAME"})
# create final adjs
df_adjs_final = pd.merge(df_adjs, df_wcel_source, on="ID_WCEL", how="left")
df_adjs_final = pd.merge(
df_adjs_final, df_wcel_target, on="lac_id", how="left"
).dropna()
df_adjs_final.rename(
columns={"ID_WCEL": "SOURCE_ID", "lac_id": "TARGET_LAC_ID"}, inplace=True
)
df_adjs_final = check_symmetry(df_adjs_final)
# create distance column
df_adjs_final["distance_km"] = _geodesic_distance_km(
df_adjs_final,
"Latitude_x",
"Longitude_x",
"Latitude_y",
"Longitude_y",
)
# process ADJI DATA
df_adji = dfs["ADJI"]
df_adji.columns = df_adji.columns.str.replace(r"[ ]", "", regex=True)
df_adji["ID_WCEL"] = _join_cols(df_adji, ["RNC", "WBTS", "WCEL"])
df_adji["lac_id"] = _join_cols(df_adji, ["AdjiLAC", "AdjiCI"])
df_adji = df_adji[["ID_WCEL", "lac_id"]]
df_adji_final = pd.merge(df_adji, df_wcel_source, on="ID_WCEL", how="left")
df_adji_final = pd.merge(
df_adji_final, df_wcel_target, on="lac_id", how="left"
).dropna()
df_adji_final.rename(
columns={"ID_WCEL": "SOURCE_ID", "lac_id": "TARGET_LAC_ID"}, inplace=True
)
df_adji_final = check_symmetry(df_adji_final)
# create distance column
df_adji_final["distance_km"] = _geodesic_distance_km(
df_adji_final,
"Latitude_x",
"Longitude_x",
"Latitude_y",
"Longitude_y",
)
# process ADJG DATA
df_adjg = dfs["ADJG"]
df_adjg.columns = df_adjg.columns.str.replace(r"[ ]", "", regex=True)
df_adjg["ID_WCEL"] = _join_cols(df_adjg, ["RNC", "WBTS", "WCEL"])
df_adjg["lac_id"] = _join_cols(df_adjg, ["AdjgLAC", "AdjgCI"])
df_adjg = df_adjg[["ID_WCEL", "lac_id"]]
df_adjg_final = pd.merge(df_adjg, df_wcel_source, on="ID_WCEL", how="left")
df_adjg_final = pd.merge(
df_adjg_final, df_bts_target, on="lac_id", how="left"
).dropna()
df_adjg_final.rename(
columns={"ID_WCEL": "SOURCE_ID", "lac_id": "TARGET_LAC_ID"}, inplace=True
)
# create distance column
df_adjg_final["distance_km"] = _geodesic_distance_km(
df_adjg_final,
"Latitude_x",
"Longitude_x",
"Latitude_y",
"Longitude_y",
)
# process ADJW DATA
df_adjw = dfs["ADJW"]
df_adjw.columns = df_adjw.columns.str.replace(r"[ ]", "", regex=True)
df_adjw["ID_BTS"] = _join_cols(df_adjw, ["BSC", "BCF", "BTS"])
df_adjw["lac_id"] = _join_cols(df_adjw, ["lac", "AdjwCId"])
df_adjw = df_adjw[["ID_BTS", "lac_id"]]
df_adjw_final = pd.merge(df_adjw, df_bts_source, on="ID_BTS", how="left")
df_adjw_final = pd.merge(
df_adjw_final, df_wcel_target, on="lac_id", how="left"
).dropna()
df_adjw_final.rename(
columns={"ID_BTS": "SOURCE_ID", "lac_id": "TARGET_LAC_ID"}, inplace=True
)
df_adjw_final = check_symmetry(df_adjw_final)
# create distance column
df_adjw_final["distance_km"] = _geodesic_distance_km(
df_adjw_final,
"Latitude_x",
"Longitude_x",
"Latitude_y",
"Longitude_y",
)
# save_dataframe(df_adjw_final, "ADJW")
return [df_adjw_final, df_adjg_final, df_adji_final, df_adjs_final, df_adce_final]
def process_neighbors_data_to_excel(file_path: str):
neighbors_dfs = process_neighbors_data(file_path)
UtilsVars.neighbors_database = convert_dfs(
neighbors_dfs, ["ADJW", "ADJG", "ADJI", "ADJS", "ADCE"]
)