|
from datetime import datetime |
|
|
|
import pandas as pd |
|
import plotly.express as px |
|
import streamlit as st |
|
|
|
from utils.convert_to_excel import convert_dfs, save_dataframe |
|
from utils.utils_vars import get_physical_db |
|
|
|
|
|
class TraficAnalysis: |
|
last_period_df: pd.DataFrame = None |
|
|
|
|
|
|
|
def extract_code(name): |
|
name = name.replace(" ", "_") if isinstance(name, str) else None |
|
return int(name.split("_")[0]) if name and len(name) >= 10 else None |
|
|
|
|
|
def preprocess_2g(df: pd.DataFrame) -> pd.DataFrame: |
|
df = df[df["BCF name"].str.len() >= 10].copy() |
|
df["2g_data_trafic"] = df["TRAFFIC_PS DL"] + df["PS_UL_Load"] |
|
df.rename(columns={"2G_Carried Traffic": "2g_voice_trafic"}, inplace=True) |
|
df["code"] = df["BCF name"].apply(extract_code) |
|
df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format="%m.%d.%Y") |
|
df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str) |
|
df = df.groupby(["date", "ID", "code"], as_index=False)[ |
|
["2g_data_trafic", "2g_voice_trafic"] |
|
].sum() |
|
return df |
|
|
|
|
|
def preprocess_3g(df: pd.DataFrame) -> pd.DataFrame: |
|
df = df[df["WBTS name"].str.len() >= 10].copy() |
|
df["code"] = df["WBTS name"].apply(extract_code) |
|
df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format="%m.%d.%Y") |
|
df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str) |
|
df.rename( |
|
columns={ |
|
"Total CS traffic - Erl": "3g_voice_trafic", |
|
"Total_Data_Traffic": "3g_data_trafic", |
|
}, |
|
inplace=True, |
|
) |
|
df = df.groupby(["date", "ID", "code"], as_index=False)[ |
|
["3g_voice_trafic", "3g_data_trafic"] |
|
].sum() |
|
return df |
|
|
|
|
|
def preprocess_lte(df: pd.DataFrame) -> pd.DataFrame: |
|
df = df[df["LNBTS name"].str.len() >= 10].copy() |
|
df["lte_data_trafic"] = ( |
|
df["4G/LTE DL Traffic Volume (GBytes)"] |
|
+ df["4G/LTE UL Traffic Volume (GBytes)"] |
|
) |
|
df["code"] = df["LNBTS name"].apply(extract_code) |
|
df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format="%m.%d.%Y") |
|
df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str) |
|
df = df.groupby(["date", "ID", "code"], as_index=False)[["lte_data_trafic"]].sum() |
|
return df |
|
|
|
|
|
|
|
def merge_and_compare(df_2g, df_3g, df_lte, pre_range, post_range, last_period_range): |
|
|
|
|
|
physical_db = get_physical_db() |
|
physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0] |
|
physical_db["code"] = ( |
|
pd.to_numeric(physical_db["code"], errors="coerce").fillna(0).astype(int) |
|
) |
|
physical_db = physical_db[["code", "Longitude", "Latitude", "City"]] |
|
physical_db = physical_db.drop_duplicates(subset="code") |
|
|
|
df = pd.merge(df_2g, df_3g, on=["date", "ID", "code"], how="outer") |
|
df = pd.merge(df, df_lte, on=["date", "ID", "code"], how="outer") |
|
|
|
|
|
for col in [ |
|
"2g_data_trafic", |
|
"2g_voice_trafic", |
|
"3g_voice_trafic", |
|
"3g_data_trafic", |
|
"lte_data_trafic", |
|
]: |
|
if col not in df: |
|
df[col] = 0 |
|
|
|
df.fillna(0, inplace=True) |
|
|
|
df["total_voice_trafic"] = df["2g_voice_trafic"] + df["3g_voice_trafic"] |
|
df["total_data_trafic"] = ( |
|
df["2g_data_trafic"] + df["3g_data_trafic"] + df["lte_data_trafic"] |
|
) |
|
df = pd.merge(df, physical_db, on=["code"], how="left") |
|
|
|
|
|
pre_start, pre_end = pd.to_datetime(pre_range[0]), pd.to_datetime(pre_range[1]) |
|
post_start, post_end = pd.to_datetime(post_range[0]), pd.to_datetime(post_range[1]) |
|
last_period_start, last_period_end = pd.to_datetime( |
|
last_period_range[0] |
|
), pd.to_datetime(last_period_range[1]) |
|
|
|
last_period = df[ |
|
(df["date"] >= last_period_start) & (df["date"] <= last_period_end) |
|
] |
|
|
|
def assign_period(date): |
|
if pre_start <= date <= pre_end: |
|
return "pre" |
|
elif post_start <= date <= post_end: |
|
return "post" |
|
else: |
|
return "other" |
|
|
|
df["period"] = df["date"].apply(assign_period) |
|
|
|
comparison = df[df["period"].isin(["pre", "post"])] |
|
|
|
pivot = ( |
|
comparison.groupby(["code", "period"])[ |
|
["total_voice_trafic", "total_data_trafic"] |
|
] |
|
.sum() |
|
.unstack() |
|
) |
|
pivot.columns = [f"{metric}_{period}" for metric, period in pivot.columns] |
|
pivot = pivot.reset_index() |
|
|
|
|
|
pivot["total_voice_trafic_diff"] = ( |
|
pivot["total_voice_trafic_post"] - pivot["total_voice_trafic_pre"] |
|
) |
|
pivot["total_data_trafic_diff"] = ( |
|
pivot["total_data_trafic_post"] - pivot["total_data_trafic_pre"] |
|
) |
|
|
|
for metric in ["total_voice_trafic", "total_data_trafic"]: |
|
pivot[f"{metric}_diff_pct"] = ( |
|
(pivot.get(f"{metric}_post", 0) - pivot.get(f"{metric}_pre", 0)) |
|
/ pivot.get(f"{metric}_pre", 1) |
|
) * 100 |
|
return df, last_period, pivot.round(2) |
|
|
|
|
|
|
|
st.title("📊 Global Trafic Analysis - 2G / 3G / LTE") |
|
doc_col, image_col = st.columns(2) |
|
|
|
with doc_col: |
|
st.write( |
|
""" |
|
The report analyzes 2G / 3G / LTE traffic : |
|
- 2G Traffic Report in CSV format (required columns : BCF name, PERIOD_START_TIME, TRAFFIC_PS DL, PS_UL_Load) |
|
- 3G Traffic Report in CSV format (required columns : WBTS name, PERIOD_START_TIME, Total CS traffic - Erl, Total_Data_Traffic) |
|
- LTE Traffic Report in CSV format (required columns : LNBTS name, PERIOD_START_TIME, 4G/LTE DL Traffic Volume (GBytes), 4G/LTE UL Traffic Volume (GBytes)) |
|
""" |
|
) |
|
|
|
|
|
|
|
|
|
|
|
upload_2g_col, upload_3g_col, upload_lte_col = st.columns(3) |
|
with upload_2g_col: |
|
two_g_file = st.file_uploader( |
|
"Upload 2G Traffic Report", type=["csv", "xls", "xlsx"] |
|
) |
|
with upload_3g_col: |
|
three_g_file = st.file_uploader( |
|
"Upload 3G Traffic Report", type=["csv", "xls", "xlsx"] |
|
) |
|
with upload_lte_col: |
|
lte_file = st.file_uploader( |
|
"Upload LTE Traffic Report", type=["csv", "xls", "xlsx"] |
|
) |
|
|
|
pre_range_col, post_range_col = st.columns(2) |
|
with pre_range_col: |
|
pre_range = st.date_input("Pre-period (from - to)", []) |
|
with post_range_col: |
|
post_range = st.date_input("Post-period (from - to)", []) |
|
|
|
last_period_range_col, number_of_top_trafic_sites_col = st.columns(2) |
|
with last_period_range_col: |
|
last_period_range = st.date_input("Last period (from - to)", []) |
|
with number_of_top_trafic_sites_col: |
|
number_of_top_trafic_sites = st.number_input( |
|
"Number of top traffic sites", value=25 |
|
) |
|
|
|
if len(pre_range) != 2 or len(post_range) != 2: |
|
st.warning("⚠️ Please select 2 dates for each period (pre and post).") |
|
st.stop() |
|
if not all([two_g_file, three_g_file, lte_file]): |
|
st.info("Please upload all 3 reports and select the comparison periods.") |
|
st.stop() |
|
|
|
if st.button("🔍 Run Analysis"): |
|
|
|
df_2g = pd.read_csv(two_g_file, delimiter=";") |
|
df_3g = pd.read_csv(three_g_file, delimiter=";") |
|
df_lte = pd.read_csv(lte_file, delimiter=";") |
|
|
|
df_2g_clean = preprocess_2g(df_2g) |
|
df_3g_clean = preprocess_3g(df_3g) |
|
df_lte_clean = preprocess_lte(df_lte) |
|
|
|
full_df, last_period, summary_df = merge_and_compare( |
|
df_2g_clean, df_3g_clean, df_lte_clean, pre_range, post_range, last_period_range |
|
) |
|
|
|
|
|
st.success("✅ Analysis completed") |
|
st.subheader("📈 Summary Analysis Pre / Post") |
|
st.dataframe(summary_df) |
|
TraficAnalysis.last_period_df = last_period |
|
|
|
|
|
|
|
|
|
if TraficAnalysis.last_period_df is not None: |
|
|
|
df = TraficAnalysis.last_period_df |
|
|
|
top_sites = ( |
|
df.groupby(["code", "City"])["total_data_trafic"] |
|
.sum() |
|
.sort_values(ascending=False) |
|
) |
|
top_sites = top_sites.head(number_of_top_trafic_sites) |
|
|
|
st.subheader(f"Top {number_of_top_trafic_sites} sites by data traffic") |
|
chart_col, data_col = st.columns(2) |
|
with data_col: |
|
st.dataframe(top_sites.sort_values(ascending=True)) |
|
|
|
fig = px.bar( |
|
top_sites.reset_index(), |
|
y=top_sites.reset_index()[["City", "code"]].agg( |
|
lambda x: "_".join(map(str, x)), axis=1 |
|
), |
|
x="total_data_trafic", |
|
title=f"Top {number_of_top_trafic_sites} sites by data traffic", |
|
orientation="h", |
|
text="total_data_trafic", |
|
text_auto=True, |
|
) |
|
|
|
with chart_col: |
|
st.plotly_chart(fig) |
|
|
|
|
|
top_sites_voice = ( |
|
df.groupby(["code", "City"])["total_voice_trafic"] |
|
.sum() |
|
.sort_values(ascending=False) |
|
) |
|
top_sites_voice = top_sites_voice.head(number_of_top_trafic_sites) |
|
|
|
st.subheader(f"Top {number_of_top_trafic_sites} sites by voice traffic") |
|
chart_col, data_col = st.columns(2) |
|
with data_col: |
|
st.dataframe(top_sites_voice.sort_values(ascending=True)) |
|
|
|
fig = px.bar( |
|
top_sites_voice.reset_index(), |
|
y=top_sites_voice.reset_index()[["City", "code"]].agg( |
|
lambda x: "_".join(map(str, x)), axis=1 |
|
), |
|
x="total_voice_trafic", |
|
title=f"Top {number_of_top_trafic_sites} sites by voice traffic", |
|
orientation="h", |
|
text="total_voice_trafic", |
|
text_auto=True, |
|
) |
|
|
|
with chart_col: |
|
st.plotly_chart(fig) |
|
|
|
|
|
min_size = 5 |
|
max_size = 40 |
|
|
|
|
|
df_data = ( |
|
df.groupby(["code", "City", "Latitude", "Longitude"])["total_data_trafic"] |
|
.sum() |
|
.reset_index() |
|
) |
|
|
|
st.subheader("Map of data trafic during last period") |
|
|
|
|
|
|
|
|
|
traffic_data_min = df_data["total_data_trafic"].min() |
|
traffic_data_max = df_data["total_data_trafic"].max() |
|
df_data["bubble_size"] = df_data["total_data_trafic"].apply( |
|
lambda x: min_size |
|
+ (max_size - min_size) |
|
* (x - traffic_data_min) |
|
/ (traffic_data_max - traffic_data_min) |
|
) |
|
|
|
|
|
custom_blue_red = [ |
|
[0.0, "#4292c6"], |
|
[0.2, "#2171b5"], |
|
[0.4, "#084594"], |
|
[0.6, "#cb181d"], |
|
[0.8, "#a50f15"], |
|
[1.0, "#67000d"], |
|
] |
|
|
|
fig = px.scatter_map( |
|
df_data, |
|
lat="Latitude", |
|
lon="Longitude", |
|
color="total_data_trafic", |
|
size="bubble_size", |
|
color_continuous_scale=custom_blue_red, |
|
size_max=max_size, |
|
zoom=10, |
|
height=600, |
|
title="Data traffic distribution", |
|
hover_data={"code": True, "total_data_trafic": True}, |
|
hover_name="code", |
|
text=[str(x) for x in df_data["code"]], |
|
) |
|
|
|
fig.update_layout( |
|
mapbox_style="open-street-map", |
|
coloraxis_colorbar=dict(title="Total Data Traffic (MB)"), |
|
coloraxis=dict(cmin=traffic_data_min, cmax=traffic_data_max), |
|
font=dict(size=10, color="black"), |
|
) |
|
|
|
st.plotly_chart(fig) |
|
|
|
|
|
|
|
|
|
df_voice = ( |
|
df.groupby(["code", "City", "Latitude", "Longitude"])["total_voice_trafic"] |
|
.sum() |
|
.reset_index() |
|
) |
|
st.subheader("Map of voice trafic during last period") |
|
|
|
|
|
traffic_voice_min = df_voice["total_voice_trafic"].min() |
|
traffic_voice_max = df_voice["total_voice_trafic"].max() |
|
df_voice["bubble_size"] = df_voice["total_voice_trafic"].apply( |
|
lambda x: min_size |
|
+ (max_size - min_size) |
|
* (x - traffic_voice_min) |
|
/ (traffic_voice_max - traffic_voice_min) |
|
) |
|
|
|
fig = px.scatter_map( |
|
df_voice, |
|
lat="Latitude", |
|
lon="Longitude", |
|
color="total_voice_trafic", |
|
size="bubble_size", |
|
color_continuous_scale=custom_blue_red, |
|
size_max=max_size, |
|
zoom=10, |
|
height=600, |
|
title="Voice traffic distribution", |
|
hover_data={"code": True, "total_voice_trafic": True}, |
|
hover_name="code", |
|
text=[str(x) for x in df_voice["code"]], |
|
) |
|
|
|
fig.update_layout( |
|
mapbox_style="open-street-map", |
|
coloraxis_colorbar=dict(title="Total Voice Traffic (MB)"), |
|
coloraxis=dict(cmin=traffic_voice_min, cmax=traffic_voice_max), |
|
font=dict(size=10, color="black"), |
|
) |
|
|
|
st.plotly_chart(fig) |
|
|
|
final_dfs = convert_dfs( |
|
[full_df, summary_df], ["Global_Trafic_Analysis", "Pre_Post_analysis"] |
|
) |
|
|
|
st.download_button( |
|
on_click="ignore", |
|
type="primary", |
|
label="Download the Analysis Report", |
|
data=final_dfs, |
|
file_name=f"Global_Trafic_Analysis_Report_{datetime.now()}.xlsx", |
|
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", |
|
) |
|
|