engralimalik commited on
Commit
f570531
·
verified ·
1 Parent(s): db1eb60

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -0
app.py ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import folium
3
+ from sklearn.cluster import KMeans
4
+ from folium.plugins import MarkerCluster
5
+ import requests
6
+ from io import BytesIO
7
+ import streamlit as st
8
+ import folium
9
+ from streamlit.components.v1 import html
10
+ import math
11
+
12
+ # Load data from Excel (directly from the URL)
13
+ def load_data(url):
14
+ # Request the file content
15
+ response = requests.get(url)
16
+
17
+ # Check if the content is an Excel file by inspecting the MIME type
18
+ if 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' not in response.headers['Content-Type']:
19
+ raise ValueError("The file is not a valid Excel file.")
20
+
21
+ # Read the file content into a pandas dataframe with the engine specified
22
+ lat_long_data = pd.read_excel(BytesIO(response.content), sheet_name="lat long", engine='openpyxl')
23
+ measurement_data = pd.read_excel(BytesIO(response.content), sheet_name="measurement data", engine='openpyxl')
24
+
25
+ # Merge data on school_id_giga
26
+ merged_data = pd.merge(
27
+ lat_long_data,
28
+ measurement_data,
29
+ left_on="school_id_giga",
30
+ right_on="school_id_giga",
31
+ how="inner"
32
+ )
33
+
34
+ # Strip whitespace from all column names
35
+ merged_data.columns = merged_data.columns.str.strip()
36
+
37
+ return merged_data
38
+
39
+ # Haversine formula to calculate distance between two lat/long points
40
+ def haversine(lat1, lon1, lat2, lon2):
41
+ # Convert latitude and longitude from degrees to radians
42
+ lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
43
+
44
+ # Haversine formula
45
+ dlat = lat2 - lat1
46
+ dlon = lon2 - lon1
47
+ a = math.sin(dlat / 2) ** 2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
48
+ c = 2 * math.asin(math.sqrt(a))
49
+
50
+ # Radius of Earth in kilometers
51
+ R = 6371
52
+ return R * c
53
+
54
+ # Perform clustering to find data center location
55
+ def find_data_center(df, n_clusters=1):
56
+ kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(df[["latitude", "longitude"]])
57
+ return kmeans.cluster_centers_
58
+
59
+ # Estimate latency and bandwidth based on distance from the data center (inverse relationship)
60
+ def estimate_latency_bandwidth(df, data_center_lat, data_center_lon):
61
+ df["distance_to_data_center"] = df.apply(
62
+ lambda row: haversine(row["latitude"], row["longitude"], data_center_lat, data_center_lon),
63
+ axis=1
64
+ )
65
+
66
+ # Latency estimation: Assuming latency decreases inversely with distance (this is just an example scale)
67
+ df["estimated_latency"] = df["distance_to_data_center"].apply(lambda x: max(10, 100 / (x + 1))) # Max latency 100ms
68
+
69
+ # Bandwidth estimation: Assuming bandwidth increases inversely with distance (again, an example scale)
70
+ df["estimated_bandwidth"] = df["distance_to_data_center"].apply(lambda x: max(10, 100 / (x + 1))) # Max bandwidth 100 Mbps
71
+
72
+ return df
73
+
74
+ # Create a map and plot the points
75
+ def plot_map(df, center):
76
+ # Create map centered on the data center location
77
+ map = folium.Map(location=[center[0][0], center[0][1]], zoom_start=10)
78
+ marker_cluster = MarkerCluster().add_to(map)
79
+
80
+ # Add school locations to the map
81
+ for idx, row in df.iterrows():
82
+ school_name = row.get("school_name", "No Name Provided") # Ensure correct column access
83
+
84
+ # Popup text showing original latency, estimated latency, bandwidth before and after
85
+ popup_text = (
86
+ f"School Name: {school_name}<br>"
87
+ f"Original Latency: {row['latency']} ms<br>"
88
+ f"Original Bandwidth: {row['download_speed']} Mbps<br>"
89
+ f"Estimated Latency After Data Center: {row['estimated_latency']} ms<br>"
90
+ f"Estimated Bandwidth After Data Center: {row['estimated_bandwidth']} Mbps"
91
+ )
92
+
93
+ folium.Marker(
94
+ location=[row["latitude"], row["longitude"]],
95
+ popup=popup_text,
96
+ icon=folium.Icon(color="blue", icon="info-sign")
97
+ ).add_to(marker_cluster)
98
+
99
+ # Add data center location to the map
100
+ folium.Marker(
101
+ location=[center[0][0], center[0][1]],
102
+ popup="Proposed Data Center",
103
+ icon=folium.Icon(color="red", icon="cloud")
104
+ ).add_to(map)
105
+
106
+ return map
107
+
108
+ # Main function to run the application
109
+ def main():
110
+ url = "https://huggingface.co/spaces/engralimalik/lace/resolve/main/data%20barbados.xlsx" # Correct raw file URL
111
+ df = load_data(url)
112
+ center = find_data_center(df)
113
+ df = estimate_latency_bandwidth(df, center[0][0], center[0][1]) # Estimate latency and bandwidth based on distance
114
+
115
+ map = plot_map(df, center)
116
+
117
+ # Embed the map directly in the Streamlit app
118
+ map_html = map._repr_html_() # Render the folium map as HTML
119
+ html(map_html, width=700, height=500) # Adjust the size of the embedded map
120
+
121
+ st.title("Impact of Data Center on Latency and Bandwidth")
122
+ st.write("This map shows school locations and proposed data center locations based on clustering. The latency and bandwidth values represent the potential improvements for schools closer to the data center.")
123
+
124
+ if __name__ == "__main__":
125
+ main()