from datetime import datetime import pandas as pd import plotly.express as px import streamlit as st from utils.convert_to_excel import convert_dfs, save_dataframe from utils.utils_vars import get_physical_db class TraficAnalysis: last_period_df: pd.DataFrame = None ############### PROCESSING ############### def extract_code(name): name = name.replace(" ", "_") if isinstance(name, str) else None return int(name.split("_")[0]) if name and len(name) >= 10 else None def preprocess_2g(df: pd.DataFrame) -> pd.DataFrame: df = df[df["BCF name"].str.len() >= 10].copy() df["2g_data_trafic"] = df["TRAFFIC_PS DL"] + df["PS_UL_Load"] df.rename(columns={"2G_Carried Traffic": "2g_voice_trafic"}, inplace=True) df["code"] = df["BCF name"].apply(extract_code) df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format="%m.%d.%Y") df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str) df = df.groupby(["date", "ID", "code"], as_index=False)[ ["2g_data_trafic", "2g_voice_trafic"] ].sum() return df def preprocess_3g(df: pd.DataFrame) -> pd.DataFrame: df = df[df["WBTS name"].str.len() >= 10].copy() df["code"] = df["WBTS name"].apply(extract_code) df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format="%m.%d.%Y") df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str) df.rename( columns={ "Total CS traffic - Erl": "3g_voice_trafic", "Total_Data_Traffic": "3g_data_trafic", }, inplace=True, ) df = df.groupby(["date", "ID", "code"], as_index=False)[ ["3g_voice_trafic", "3g_data_trafic"] ].sum() return df def preprocess_lte(df: pd.DataFrame) -> pd.DataFrame: df = df[df["LNBTS name"].str.len() >= 10].copy() df["lte_data_trafic"] = ( df["4G/LTE DL Traffic Volume (GBytes)"] + df["4G/LTE UL Traffic Volume (GBytes)"] ) df["code"] = df["LNBTS name"].apply(extract_code) df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format="%m.%d.%Y") df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str) df = df.groupby(["date", "ID", "code"], as_index=False)[["lte_data_trafic"]].sum() return df ############################## ANALYSIS ################ def merge_and_compare(df_2g, df_3g, df_lte, pre_range, post_range, last_period_range): # Load physical database physical_db = get_physical_db() physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0] physical_db["code"] = ( pd.to_numeric(physical_db["code"], errors="coerce").fillna(0).astype(int) ) physical_db = physical_db[["code", "Longitude", "Latitude", "City"]] physical_db = physical_db.drop_duplicates(subset="code") df = pd.merge(df_2g, df_3g, on=["date", "ID", "code"], how="outer") df = pd.merge(df, df_lte, on=["date", "ID", "code"], how="outer") # print(df) for col in [ "2g_data_trafic", "2g_voice_trafic", "3g_voice_trafic", "3g_data_trafic", "lte_data_trafic", ]: if col not in df: df[col] = 0 df.fillna(0, inplace=True) df["total_voice_trafic"] = df["2g_voice_trafic"] + df["3g_voice_trafic"] df["total_data_trafic"] = ( df["2g_data_trafic"] + df["3g_data_trafic"] + df["lte_data_trafic"] ) df = pd.merge(df, physical_db, on=["code"], how="left") # Assign period based on date range pre_start, pre_end = pd.to_datetime(pre_range[0]), pd.to_datetime(pre_range[1]) post_start, post_end = pd.to_datetime(post_range[0]), pd.to_datetime(post_range[1]) last_period_start, last_period_end = pd.to_datetime( last_period_range[0] ), pd.to_datetime(last_period_range[1]) last_period = df[ (df["date"] >= last_period_start) & (df["date"] <= last_period_end) ] def assign_period(date): if pre_start <= date <= pre_end: return "pre" elif post_start <= date <= post_end: return "post" else: return "other" df["period"] = df["date"].apply(assign_period) comparison = df[df["period"].isin(["pre", "post"])] pivot = ( comparison.groupby(["code", "period"])[ ["total_voice_trafic", "total_data_trafic"] ] .sum() .unstack() ) pivot.columns = [f"{metric}_{period}" for metric, period in pivot.columns] pivot = pivot.reset_index() # Differences pivot["total_voice_trafic_diff"] = ( pivot["total_voice_trafic_post"] - pivot["total_voice_trafic_pre"] ) pivot["total_data_trafic_diff"] = ( pivot["total_data_trafic_post"] - pivot["total_data_trafic_pre"] ) for metric in ["total_voice_trafic", "total_data_trafic"]: pivot[f"{metric}_diff_pct"] = ( (pivot.get(f"{metric}_post", 0) - pivot.get(f"{metric}_pre", 0)) / pivot.get(f"{metric}_pre", 1) ) * 100 return df, last_period, pivot.round(2) ############################## UI ######################### st.title("📊 Global Trafic Analysis - 2G / 3G / LTE") doc_col, image_col = st.columns(2) with doc_col: st.write( """ The report analyzes 2G / 3G / LTE traffic : - 2G Traffic Report in CSV format (required columns : BCF name, PERIOD_START_TIME, TRAFFIC_PS DL, PS_UL_Load) - 3G Traffic Report in CSV format (required columns : WBTS name, PERIOD_START_TIME, Total CS traffic - Erl, Total_Data_Traffic) - LTE Traffic Report in CSV format (required columns : LNBTS name, PERIOD_START_TIME, 4G/LTE DL Traffic Volume (GBytes), 4G/LTE UL Traffic Volume (GBytes)) """ ) # with image_col: # st.image("./assets/trafic_analysis.png", width=250) upload_2g_col, upload_3g_col, upload_lte_col = st.columns(3) with upload_2g_col: two_g_file = st.file_uploader( "Upload 2G Traffic Report", type=["csv", "xls", "xlsx"] ) with upload_3g_col: three_g_file = st.file_uploader( "Upload 3G Traffic Report", type=["csv", "xls", "xlsx"] ) with upload_lte_col: lte_file = st.file_uploader( "Upload LTE Traffic Report", type=["csv", "xls", "xlsx"] ) pre_range_col, post_range_col = st.columns(2) with pre_range_col: pre_range = st.date_input("Pre-period (from - to)", []) with post_range_col: post_range = st.date_input("Post-period (from - to)", []) last_period_range_col, number_of_top_trafic_sites_col = st.columns(2) with last_period_range_col: last_period_range = st.date_input("Last period (from - to)", []) with number_of_top_trafic_sites_col: number_of_top_trafic_sites = st.number_input( "Number of top traffic sites", value=25 ) if len(pre_range) != 2 or len(post_range) != 2: st.warning("⚠️ Please select 2 dates for each period (pre and post).") st.stop() if not all([two_g_file, three_g_file, lte_file]): st.info("Please upload all 3 reports and select the comparison periods.") st.stop() if st.button("🔍 Run Analysis"): df_2g = pd.read_csv(two_g_file, delimiter=";") df_3g = pd.read_csv(three_g_file, delimiter=";") df_lte = pd.read_csv(lte_file, delimiter=";") df_2g_clean = preprocess_2g(df_2g) df_3g_clean = preprocess_3g(df_3g) df_lte_clean = preprocess_lte(df_lte) full_df, last_period, summary_df = merge_and_compare( df_2g_clean, df_3g_clean, df_lte_clean, pre_range, post_range, last_period_range ) # 🔍 Display Summary st.success("✅ Analysis completed") st.subheader("📈 Summary Analysis Pre / Post") st.dataframe(summary_df) TraficAnalysis.last_period_df = last_period #######################################################################################################"" ####################################################################################################### if TraficAnalysis.last_period_df is not None: df = TraficAnalysis.last_period_df # Get top trafics sites based on total data trafic during last period top_sites = ( df.groupby(["code", "City"])["total_data_trafic"] .sum() .sort_values(ascending=False) ) top_sites = top_sites.head(number_of_top_trafic_sites) st.subheader(f"Top {number_of_top_trafic_sites} sites by data traffic") chart_col, data_col = st.columns(2) with data_col: st.dataframe(top_sites.sort_values(ascending=True)) # chart fig = px.bar( top_sites.reset_index(), y=top_sites.reset_index()[["City", "code"]].agg( lambda x: "_".join(map(str, x)), axis=1 ), x="total_data_trafic", title=f"Top {number_of_top_trafic_sites} sites by data traffic", orientation="h", text="total_data_trafic", text_auto=True, ) # fig.update_layout(height=600) with chart_col: st.plotly_chart(fig) # Top sites by voice trafic during last period top_sites_voice = ( df.groupby(["code", "City"])["total_voice_trafic"] .sum() .sort_values(ascending=False) ) top_sites_voice = top_sites_voice.head(number_of_top_trafic_sites) st.subheader(f"Top {number_of_top_trafic_sites} sites by voice traffic") chart_col, data_col = st.columns(2) with data_col: st.dataframe(top_sites_voice.sort_values(ascending=True)) # chart fig = px.bar( top_sites_voice.reset_index(), y=top_sites_voice.reset_index()[["City", "code"]].agg( lambda x: "_".join(map(str, x)), axis=1 ), x="total_voice_trafic", title=f"Top {number_of_top_trafic_sites} sites by voice traffic", orientation="h", text="total_voice_trafic", text_auto=True, ) # fig.update_layout(height=600) with chart_col: st.plotly_chart(fig) ##################################################### min_size = 5 max_size = 40 # Map of sum of data trafic during last period # Aggregate total data traffic df_data = ( df.groupby(["code", "City", "Latitude", "Longitude"])["total_data_trafic"] .sum() .reset_index() ) st.subheader("Map of data trafic during last period") # Define size range # Linear size scaling traffic_data_min = df_data["total_data_trafic"].min() traffic_data_max = df_data["total_data_trafic"].max() df_data["bubble_size"] = df_data["total_data_trafic"].apply( lambda x: min_size + (max_size - min_size) * (x - traffic_data_min) / (traffic_data_max - traffic_data_min) ) # Custom blue color scale: start from visible blue custom_blue_red = [ [0.0, "#4292c6"], # light blue [0.2, "#2171b5"], [0.4, "#084594"], # dark blue [0.6, "#cb181d"], # Strong red [0.8, "#a50f15"], # Darker red [1.0, "#67000d"], # Very dark red ] fig = px.scatter_map( df_data, lat="Latitude", lon="Longitude", color="total_data_trafic", size="bubble_size", color_continuous_scale=custom_blue_red, size_max=max_size, zoom=10, height=600, title="Data traffic distribution", hover_data={"code": True, "total_data_trafic": True}, hover_name="code", text=[str(x) for x in df_data["code"]], ) fig.update_layout( mapbox_style="open-street-map", coloraxis_colorbar=dict(title="Total Data Traffic (MB)"), coloraxis=dict(cmin=traffic_data_min, cmax=traffic_data_max), font=dict(size=10, color="black"), ) st.plotly_chart(fig) ######################################################################################## # Map of sum of voice trafic during last period # Aggregate total voice traffic df_voice = ( df.groupby(["code", "City", "Latitude", "Longitude"])["total_voice_trafic"] .sum() .reset_index() ) st.subheader("Map of voice trafic during last period") # Linear size scaling traffic_voice_min = df_voice["total_voice_trafic"].min() traffic_voice_max = df_voice["total_voice_trafic"].max() df_voice["bubble_size"] = df_voice["total_voice_trafic"].apply( lambda x: min_size + (max_size - min_size) * (x - traffic_voice_min) / (traffic_voice_max - traffic_voice_min) ) fig = px.scatter_map( df_voice, lat="Latitude", lon="Longitude", color="total_voice_trafic", size="bubble_size", color_continuous_scale=custom_blue_red, size_max=max_size, zoom=10, height=600, title="Voice traffic distribution", hover_data={"code": True, "total_voice_trafic": True}, hover_name="code", text=[str(x) for x in df_voice["code"]], ) fig.update_layout( mapbox_style="open-street-map", coloraxis_colorbar=dict(title="Total Voice Traffic (MB)"), coloraxis=dict(cmin=traffic_voice_min, cmax=traffic_voice_max), font=dict(size=10, color="black"), ) st.plotly_chart(fig) final_dfs = convert_dfs( [full_df, summary_df], ["Global_Trafic_Analysis", "Pre_Post_analysis"] ) # 📥 Bouton de téléchargement st.download_button( on_click="ignore", type="primary", label="Download the Analysis Report", data=final_dfs, file_name=f"Global_Trafic_Analysis_Report_{datetime.now()}.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", )