Spaces:

DavMelchi
/

db_query

Running

App Files Files Community

db_query / apps /kpi_analysis /trafic_analysis.py

DavMelchi

Adding global trafic analysis

9d2b604 6 days ago

raw

history blame contribute delete

13.6 kB

	from datetime import datetime

	import pandas as pd
	import plotly.express as px
	import streamlit as st

	from utils.convert_to_excel import convert_dfs, save_dataframe
	from utils.utils_vars import get_physical_db


	class TraficAnalysis:
	last_period_df: pd.DataFrame = None


	############### PROCESSING ###############
	def extract_code(name):
	name = name.replace(" ", "_") if isinstance(name, str) else None
	return int(name.split("_")[0]) if name and len(name) >= 10 else None


	def preprocess_2g(df: pd.DataFrame) -> pd.DataFrame:
	df = df[df["BCF name"].str.len() >= 10].copy()
	df["2g_data_trafic"] = df["TRAFFIC_PS DL"] + df["PS_UL_Load"]
	df.rename(columns={"2G_Carried Traffic": "2g_voice_trafic"}, inplace=True)
	df["code"] = df["BCF name"].apply(extract_code)
	df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format="%m.%d.%Y")
	df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
	df = df.groupby(["date", "ID", "code"], as_index=False)[
	["2g_data_trafic", "2g_voice_trafic"]
	].sum()
	return df


	def preprocess_3g(df: pd.DataFrame) -> pd.DataFrame:
	df = df[df["WBTS name"].str.len() >= 10].copy()
	df["code"] = df["WBTS name"].apply(extract_code)
	df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format="%m.%d.%Y")
	df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
	df.rename(
	columns={
	"Total CS traffic - Erl": "3g_voice_trafic",
	"Total_Data_Traffic": "3g_data_trafic",
	},
	inplace=True,
	)
	df = df.groupby(["date", "ID", "code"], as_index=False)[
	["3g_voice_trafic", "3g_data_trafic"]
	].sum()
	return df


	def preprocess_lte(df: pd.DataFrame) -> pd.DataFrame:
	df = df[df["LNBTS name"].str.len() >= 10].copy()
	df["lte_data_trafic"] = (
	df["4G/LTE DL Traffic Volume (GBytes)"]
	+ df["4G/LTE UL Traffic Volume (GBytes)"]
	)
	df["code"] = df["LNBTS name"].apply(extract_code)
	df["date"] = pd.to_datetime(df["PERIOD_START_TIME"], format="%m.%d.%Y")
	df["ID"] = df["date"].astype(str) + "_" + df["code"].astype(str)
	df = df.groupby(["date", "ID", "code"], as_index=False)[["lte_data_trafic"]].sum()
	return df


	############################## ANALYSIS ################
	def merge_and_compare(df_2g, df_3g, df_lte, pre_range, post_range, last_period_range):

	# Load physical database
	physical_db = get_physical_db()
	physical_db["code"] = physical_db["Code_Sector"].str.split("_").str[0]
	physical_db["code"] = (
	pd.to_numeric(physical_db["code"], errors="coerce").fillna(0).astype(int)
	)
	physical_db = physical_db[["code", "Longitude", "Latitude", "City"]]
	physical_db = physical_db.drop_duplicates(subset="code")

	df = pd.merge(df_2g, df_3g, on=["date", "ID", "code"], how="outer")
	df = pd.merge(df, df_lte, on=["date", "ID", "code"], how="outer")
	# print(df)

	for col in [
	"2g_data_trafic",
	"2g_voice_trafic",
	"3g_voice_trafic",
	"3g_data_trafic",
	"lte_data_trafic",
	]:
	if col not in df:
	df[col] = 0

	df.fillna(0, inplace=True)

	df["total_voice_trafic"] = df["2g_voice_trafic"] + df["3g_voice_trafic"]
	df["total_data_trafic"] = (
	df["2g_data_trafic"] + df["3g_data_trafic"] + df["lte_data_trafic"]
	)
	df = pd.merge(df, physical_db, on=["code"], how="left")

	# Assign period based on date range
	pre_start, pre_end = pd.to_datetime(pre_range[0]), pd.to_datetime(pre_range[1])
	post_start, post_end = pd.to_datetime(post_range[0]), pd.to_datetime(post_range[1])
	last_period_start, last_period_end = pd.to_datetime(
	last_period_range[0]
	), pd.to_datetime(last_period_range[1])

	last_period = df[
	(df["date"] >= last_period_start) & (df["date"] <= last_period_end)
	]

	def assign_period(date):
	if pre_start <= date <= pre_end:
	return "pre"
	elif post_start <= date <= post_end:
	return "post"
	else:
	return "other"

	df["period"] = df["date"].apply(assign_period)

	comparison = df[df["period"].isin(["pre", "post"])]

	pivot = (
	comparison.groupby(["code", "period"])[
	["total_voice_trafic", "total_data_trafic"]
	]
	.sum()
	.unstack()
	)
	pivot.columns = [f"{metric}_{period}" for metric, period in pivot.columns]
	pivot = pivot.reset_index()

	# Differences
	pivot["total_voice_trafic_diff"] = (
	pivot["total_voice_trafic_post"] - pivot["total_voice_trafic_pre"]
	)
	pivot["total_data_trafic_diff"] = (
	pivot["total_data_trafic_post"] - pivot["total_data_trafic_pre"]
	)

	for metric in ["total_voice_trafic", "total_data_trafic"]:
	pivot[f"{metric}_diff_pct"] = (
	(pivot.get(f"{metric}_post", 0) - pivot.get(f"{metric}_pre", 0))
	/ pivot.get(f"{metric}_pre", 1)
	) * 100
	return df, last_period, pivot.round(2)


	############################## UI #########################
	st.title("📊 Global Trafic Analysis - 2G / 3G / LTE")
	doc_col, image_col = st.columns(2)

	with doc_col:
	st.write(
	"""
	The report analyzes 2G / 3G / LTE traffic :
	- 2G Traffic Report in CSV format (required columns : BCF name, PERIOD_START_TIME, TRAFFIC_PS DL, PS_UL_Load)
	- 3G Traffic Report in CSV format (required columns : WBTS name, PERIOD_START_TIME, Total CS traffic - Erl, Total_Data_Traffic)
	- LTE Traffic Report in CSV format (required columns : LNBTS name, PERIOD_START_TIME, 4G/LTE DL Traffic Volume (GBytes), 4G/LTE UL Traffic Volume (GBytes))
	"""
	)

	# with image_col:
	# st.image("./assets/trafic_analysis.png", width=250)


	upload_2g_col, upload_3g_col, upload_lte_col = st.columns(3)
	with upload_2g_col:
	two_g_file = st.file_uploader(
	"Upload 2G Traffic Report", type=["csv", "xls", "xlsx"]
	)
	with upload_3g_col:
	three_g_file = st.file_uploader(
	"Upload 3G Traffic Report", type=["csv", "xls", "xlsx"]
	)
	with upload_lte_col:
	lte_file = st.file_uploader(
	"Upload LTE Traffic Report", type=["csv", "xls", "xlsx"]
	)

	pre_range_col, post_range_col = st.columns(2)
	with pre_range_col:
	pre_range = st.date_input("Pre-period (from - to)", [])
	with post_range_col:
	post_range = st.date_input("Post-period (from - to)", [])

	last_period_range_col, number_of_top_trafic_sites_col = st.columns(2)
	with last_period_range_col:
	last_period_range = st.date_input("Last period (from - to)", [])
	with number_of_top_trafic_sites_col:
	number_of_top_trafic_sites = st.number_input(
	"Number of top traffic sites", value=25
	)

	if len(pre_range) != 2 or len(post_range) != 2:
	st.warning("⚠️ Please select 2 dates for each period (pre and post).")
	st.stop()
	if not all([two_g_file, three_g_file, lte_file]):
	st.info("Please upload all 3 reports and select the comparison periods.")
	st.stop()

	if st.button("🔍 Run Analysis"):

	df_2g = pd.read_csv(two_g_file, delimiter=";")
	df_3g = pd.read_csv(three_g_file, delimiter=";")
	df_lte = pd.read_csv(lte_file, delimiter=";")

	df_2g_clean = preprocess_2g(df_2g)
	df_3g_clean = preprocess_3g(df_3g)
	df_lte_clean = preprocess_lte(df_lte)

	full_df, last_period, summary_df = merge_and_compare(
	df_2g_clean, df_3g_clean, df_lte_clean, pre_range, post_range, last_period_range
	)

	# 🔍 Display Summary
	st.success("✅ Analysis completed")
	st.subheader("📈 Summary Analysis Pre / Post")
	st.dataframe(summary_df)
	TraficAnalysis.last_period_df = last_period

	#######################################################################################################""

	#######################################################################################################
	if TraficAnalysis.last_period_df is not None:

	df = TraficAnalysis.last_period_df
	# Get top trafics sites based on total data trafic during last period
	top_sites = (
	df.groupby(["code", "City"])["total_data_trafic"]
	.sum()
	.sort_values(ascending=False)
	)
	top_sites = top_sites.head(number_of_top_trafic_sites)

	st.subheader(f"Top {number_of_top_trafic_sites} sites by data traffic")
	chart_col, data_col = st.columns(2)
	with data_col:
	st.dataframe(top_sites.sort_values(ascending=True))
	# chart
	fig = px.bar(
	top_sites.reset_index(),
	y=top_sites.reset_index()[["City", "code"]].agg(
	lambda x: "_".join(map(str, x)), axis=1
	),
	x="total_data_trafic",
	title=f"Top {number_of_top_trafic_sites} sites by data traffic",
	orientation="h",
	text="total_data_trafic",
	text_auto=True,
	)
	# fig.update_layout(height=600)
	with chart_col:
	st.plotly_chart(fig)

	# Top sites by voice trafic during last period
	top_sites_voice = (
	df.groupby(["code", "City"])["total_voice_trafic"]
	.sum()
	.sort_values(ascending=False)
	)
	top_sites_voice = top_sites_voice.head(number_of_top_trafic_sites)

	st.subheader(f"Top {number_of_top_trafic_sites} sites by voice traffic")
	chart_col, data_col = st.columns(2)
	with data_col:
	st.dataframe(top_sites_voice.sort_values(ascending=True))
	# chart
	fig = px.bar(
	top_sites_voice.reset_index(),
	y=top_sites_voice.reset_index()[["City", "code"]].agg(
	lambda x: "_".join(map(str, x)), axis=1
	),
	x="total_voice_trafic",
	title=f"Top {number_of_top_trafic_sites} sites by voice traffic",
	orientation="h",
	text="total_voice_trafic",
	text_auto=True,
	)
	# fig.update_layout(height=600)
	with chart_col:
	st.plotly_chart(fig)

	#####################################################
	min_size = 5
	max_size = 40
	# Map of sum of data trafic during last period
	# Aggregate total data traffic
	df_data = (
	df.groupby(["code", "City", "Latitude", "Longitude"])["total_data_trafic"]
	.sum()
	.reset_index()
	)

	st.subheader("Map of data trafic during last period")

	# Define size range

	# Linear size scaling
	traffic_data_min = df_data["total_data_trafic"].min()
	traffic_data_max = df_data["total_data_trafic"].max()
	df_data["bubble_size"] = df_data["total_data_trafic"].apply(
	lambda x: min_size
	+ (max_size - min_size)
	* (x - traffic_data_min)
	/ (traffic_data_max - traffic_data_min)
	)

	# Custom blue color scale: start from visible blue
	custom_blue_red = [
	[0.0, "#4292c6"], # light blue
	[0.2, "#2171b5"],
	[0.4, "#084594"], # dark blue
	[0.6, "#cb181d"], # Strong red
	[0.8, "#a50f15"], # Darker red
	[1.0, "#67000d"], # Very dark red
	]

	fig = px.scatter_map(
	df_data,
	lat="Latitude",
	lon="Longitude",
	color="total_data_trafic",
	size="bubble_size",
	color_continuous_scale=custom_blue_red,
	size_max=max_size,
	zoom=10,
	height=600,
	title="Data traffic distribution",
	hover_data={"code": True, "total_data_trafic": True},
	hover_name="code",
	text=[str(x) for x in df_data["code"]],
	)

	fig.update_layout(
	mapbox_style="open-street-map",
	coloraxis_colorbar=dict(title="Total Data Traffic (MB)"),
	coloraxis=dict(cmin=traffic_data_min, cmax=traffic_data_max),
	font=dict(size=10, color="black"),
	)

	st.plotly_chart(fig)

	########################################################################################
	# Map of sum of voice trafic during last period
	# Aggregate total voice traffic
	df_voice = (
	df.groupby(["code", "City", "Latitude", "Longitude"])["total_voice_trafic"]
	.sum()
	.reset_index()
	)
	st.subheader("Map of voice trafic during last period")

	# Linear size scaling
	traffic_voice_min = df_voice["total_voice_trafic"].min()
	traffic_voice_max = df_voice["total_voice_trafic"].max()
	df_voice["bubble_size"] = df_voice["total_voice_trafic"].apply(
	lambda x: min_size
	+ (max_size - min_size)
	* (x - traffic_voice_min)
	/ (traffic_voice_max - traffic_voice_min)
	)

	fig = px.scatter_map(
	df_voice,
	lat="Latitude",
	lon="Longitude",
	color="total_voice_trafic",
	size="bubble_size",
	color_continuous_scale=custom_blue_red,
	size_max=max_size,
	zoom=10,
	height=600,
	title="Voice traffic distribution",
	hover_data={"code": True, "total_voice_trafic": True},
	hover_name="code",
	text=[str(x) for x in df_voice["code"]],
	)

	fig.update_layout(
	mapbox_style="open-street-map",
	coloraxis_colorbar=dict(title="Total Voice Traffic (MB)"),
	coloraxis=dict(cmin=traffic_voice_min, cmax=traffic_voice_max),
	font=dict(size=10, color="black"),
	)

	st.plotly_chart(fig)

	final_dfs = convert_dfs(
	[full_df, summary_df], ["Global_Trafic_Analysis", "Pre_Post_analysis"]
	)
	# 📥 Bouton de téléchargement
	st.download_button(
	on_click="ignore",
	type="primary",
	label="Download the Analysis Report",
	data=final_dfs,
	file_name=f"Global_Trafic_Analysis_Report_{datetime.now()}.xlsx",
	mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
	)