engralimalik commited on
Commit
15bb725
·
verified ·
1 Parent(s): 7065c57

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -78
app.py CHANGED
@@ -5,121 +5,98 @@ from folium.plugins import MarkerCluster
5
  import requests
6
  from io import BytesIO
7
  import streamlit as st
8
- import folium
9
- from streamlit.components.v1 import html
10
- import math
11
 
12
- # Load data from Excel (directly from the URL)
13
  def load_data(url):
14
- # Request the file content
15
  response = requests.get(url)
16
-
17
- # Check if the content is an Excel file by inspecting the MIME type
18
- if 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' not in response.headers['Content-Type']:
19
- raise ValueError("The file is not a valid Excel file.")
20
-
21
- # Read the file content into a pandas dataframe with the engine specified
22
  lat_long_data = pd.read_excel(BytesIO(response.content), sheet_name="lat long", engine='openpyxl')
23
  measurement_data = pd.read_excel(BytesIO(response.content), sheet_name="measurement data", engine='openpyxl')
24
-
25
  # Merge data on school_id_giga
26
- merged_data = pd.merge(
27
- lat_long_data,
28
- measurement_data,
29
- left_on="school_id_giga",
30
- right_on="school_id_giga",
31
- how="inner"
32
- )
33
-
34
- # Strip whitespace from all column names
35
- merged_data.columns = merged_data.columns.str.strip()
36
-
37
  return merged_data
38
 
39
- # Haversine formula to calculate distance between two lat/long points
40
- def haversine(lat1, lon1, lat2, lon2):
41
- # Convert latitude and longitude from degrees to radians
42
- lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
43
-
44
- # Haversine formula
45
- dlat = lat2 - lat1
46
- dlon = lon2 - lon1
47
- a = math.sin(dlat / 2) ** 2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
48
- c = 2 * math.asin(math.sqrt(a))
49
-
50
- # Radius of Earth in kilometers
51
- R = 6371
52
- return R * c
53
-
54
  # Perform clustering to find data center location
55
  def find_data_center(df, n_clusters=1):
56
  kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(df[["latitude", "longitude"]])
57
  return kmeans.cluster_centers_
58
 
59
- # Estimate latency and bandwidth based on distance from the data center (inverse relationship)
60
- def estimate_latency_bandwidth(df, data_center_lat, data_center_lon):
61
- df["distance_to_data_center"] = df.apply(
62
- lambda row: haversine(row["latitude"], row["longitude"], data_center_lat, data_center_lon),
63
- axis=1
64
- )
65
-
66
- # Latency estimation: Assuming latency decreases inversely with distance (this is just an example scale)
67
- df["estimated_latency"] = df["distance_to_data_center"].apply(lambda x: max(10, 100 / (x + 1))) # Max latency 100ms
68
-
69
- # Bandwidth estimation: Assuming bandwidth increases inversely with distance (again, an example scale)
70
- df["estimated_bandwidth"] = df["distance_to_data_center"].apply(lambda x: max(10, 100 / (x + 1))) # Max bandwidth 100 Mbps
71
-
72
- return df
73
-
74
  # Create a map and plot the points
75
  def plot_map(df, center):
76
- # Create map centered on the data center location
77
  map = folium.Map(location=[center[0][0], center[0][1]], zoom_start=10)
78
  marker_cluster = MarkerCluster().add_to(map)
79
 
80
- # Add school locations to the map
81
  for idx, row in df.iterrows():
82
- school_name = row.get("school_name", "No Name Provided") # Ensure correct column access
83
-
84
- # Popup text showing original latency, estimated latency, bandwidth before and after
85
- popup_text = (
86
- f"School Name: {school_name}<br>"
87
- f"Original Latency: {row['latency']} ms<br>"
88
- f"Original Bandwidth: {row['download_speed']} Mbps<br>"
89
- f"Estimated Latency After Data Center: {row['estimated_latency']} ms<br>"
90
- f"Estimated Bandwidth After Data Center: {row['estimated_bandwidth']} Mbps"
91
- )
92
-
93
  folium.Marker(
94
  location=[row["latitude"], row["longitude"]],
95
- popup=popup_text,
 
 
 
 
 
96
  icon=folium.Icon(color="blue", icon="info-sign")
97
  ).add_to(marker_cluster)
98
 
99
- # Add data center location to the map
100
  folium.Marker(
101
  location=[center[0][0], center[0][1]],
102
  popup="Proposed Data Center",
103
  icon=folium.Icon(color="red", icon="cloud")
104
  ).add_to(map)
105
-
106
  return map
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  # Main function to run the application
109
  def main():
110
- url = "https://huggingface.co/spaces/engralimalik/lace/resolve/main/data%20barbados.xlsx" # Correct raw file URL
111
  df = load_data(url)
 
 
112
  center = find_data_center(df)
113
- df = estimate_latency_bandwidth(df, center[0][0], center[0][1]) # Estimate latency and bandwidth based on distance
114
 
 
115
  map = plot_map(df, center)
116
-
117
- # Embed the map directly in the Streamlit app
118
- map_html = map._repr_html_() # Render the folium map as HTML
119
- html(map_html, width=700, height=500) # Adjust the size of the embedded map
120
-
121
- st.title("Impact of Data Center on Latency and Bandwidth")
122
- st.write("This map shows school locations and proposed data center locations based on clustering. The latency and bandwidth values represent the potential improvements for schools closer to the data center.")
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
  if __name__ == "__main__":
125
  main()
 
5
  import requests
6
  from io import BytesIO
7
  import streamlit as st
 
 
 
8
 
9
+ # Load data from Excel
10
  def load_data(url):
11
+ # Fetch the file from the URL
12
  response = requests.get(url)
13
+ # Read the Excel file from the response content
 
 
 
 
 
14
  lat_long_data = pd.read_excel(BytesIO(response.content), sheet_name="lat long", engine='openpyxl')
15
  measurement_data = pd.read_excel(BytesIO(response.content), sheet_name="measurement data", engine='openpyxl')
16
+
17
  # Merge data on school_id_giga
18
+ merged_data = pd.merge(lat_long_data, measurement_data, on="school_id_giga", how="inner")
 
 
 
 
 
 
 
 
 
 
19
  return merged_data
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  # Perform clustering to find data center location
22
  def find_data_center(df, n_clusters=1):
23
  kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(df[["latitude", "longitude"]])
24
  return kmeans.cluster_centers_
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  # Create a map and plot the points
27
  def plot_map(df, center):
 
28
  map = folium.Map(location=[center[0][0], center[0][1]], zoom_start=10)
29
  marker_cluster = MarkerCluster().add_to(map)
30
 
31
+ # Add school locations
32
  for idx, row in df.iterrows():
 
 
 
 
 
 
 
 
 
 
 
33
  folium.Marker(
34
  location=[row["latitude"], row["longitude"]],
35
+ popup=(
36
+ f"School Name: {row['school_name']}<br>"
37
+ f"Download Speed: {row['download_speed']} Mbps<br>"
38
+ f"Upload Speed: {row['upload_speed']} Mbps<br>"
39
+ f"Latency: {row['latency']} ms"
40
+ ),
41
  icon=folium.Icon(color="blue", icon="info-sign")
42
  ).add_to(marker_cluster)
43
 
44
+ # Add data center
45
  folium.Marker(
46
  location=[center[0][0], center[0][1]],
47
  popup="Proposed Data Center",
48
  icon=folium.Icon(color="red", icon="cloud")
49
  ).add_to(map)
 
50
  return map
51
 
52
+ # Calculate before and after data center metrics
53
+ def calculate_metrics(df, center):
54
+ # Calculate distances to the data center for each school
55
+ df['distance_to_center'] = ((df['latitude'] - center[0][0])**2 + (df['longitude'] - center[0][1])**2)**0.5
56
+
57
+ # Assuming that schools closer to the center represent the "after" data center scenario
58
+ before_data = df[df['distance_to_center'] > df['distance_to_center'].median()]
59
+ after_data = df[df['distance_to_center'] <= df['distance_to_center'].median()]
60
+
61
+ # Calculate average bandwidth and latency
62
+ before_bandwidth = before_data[['download_speed', 'upload_speed']].mean()
63
+ before_latency = before_data['latency'].mean()
64
+
65
+ after_bandwidth = after_data[['download_speed', 'upload_speed']].mean()
66
+ after_latency = after_data['latency'].mean()
67
+
68
+ return before_bandwidth, before_latency, after_bandwidth, after_latency
69
+
70
  # Main function to run the application
71
  def main():
72
+ url = "https://huggingface.co/spaces/engralimalik/lace/resolve/main/data%20barbados.xlsx"
73
  df = load_data(url)
74
+
75
+ # Find the center for the data center location
76
  center = find_data_center(df)
 
77
 
78
+ # Plot the map
79
  map = plot_map(df, center)
80
+ map.save("index.html")
81
+
82
+ # Calculate before and after metrics
83
+ before_bandwidth, before_latency, after_bandwidth, after_latency = calculate_metrics(df, center)
84
+
85
+ # Display the map in Streamlit
86
+ st.title("Impact of Data Center Addition")
87
+ st.markdown("Here’s the map of the schools and the proposed data center location.")
88
+ st.components.v1.html(open("index.html", "r").read(), height=500)
89
+
90
+ # Display before and after metrics
91
+ st.subheader("Before Data Center Addition")
92
+ st.write(f"Average Download Speed: {before_bandwidth['download_speed']} Mbps")
93
+ st.write(f"Average Upload Speed: {before_bandwidth['upload_speed']} Mbps")
94
+ st.write(f"Average Latency: {before_latency} ms")
95
+
96
+ st.subheader("After Data Center Addition")
97
+ st.write(f"Average Download Speed: {after_bandwidth['download_speed']} Mbps")
98
+ st.write(f"Average Upload Speed: {after_bandwidth['upload_speed']} Mbps")
99
+ st.write(f"Average Latency: {after_latency} ms")
100
 
101
  if __name__ == "__main__":
102
  main()