Spaces:
Sleeping
Sleeping
''' | |
Module for creating a Bokeh Layout object to be embedded in HTML templates using Flask. | |
This module: | |
- Loads data from the specified CSV files | |
- Prepares and processes the data | |
- Creates Bokeh Figure and DataTable objects | |
- Builds a Bokeh Layout combining these elements | |
Dependencies: | |
- CSV files located in the "data" folder | |
''' | |
import ast | |
from bokeh.layouts import column, row | |
from bokeh.models import CDSView, ColumnDataSource, CustomJS, DataTable, DatetimeTickFormatter | |
from bokeh.models import HTMLTemplateFormatter, HoverTool, IndexFilter, TableColumn, TapTool | |
from bokeh.plotting import figure | |
from bokeh.transform import linear_cmap | |
import math | |
import pandas as pd | |
import xyzservices.providers as xyz | |
PARKING_HISTORY_CSV_FILEPATH = "./data/parking_occupancy_history.csv" | |
GENERAL_INFO_CSV_FILEPATH = "./data/parking_general_information.csv" | |
PARKING_ID_HOMEPAGE = 'LPA0740' | |
DATA_TABLE_COLUMNS_FILTER = [ | |
"parking", | |
"heure", | |
"nombre_de_places_disponibles", | |
"capacité_total", | |
"nombre de niveaux", | |
"hauteur limite (mètre)", | |
"téléphone", | |
"tarifs", | |
"adresse" | |
] | |
LATITUDE_LYON = 45.764043 | |
LONGITUDE_LYON = 4.835659 | |
CIRCLE_SIZE_BOUNDS = (10, 25) | |
ZOOM_LEVEL = 10000 | |
def get_address(string_dict): | |
""" | |
Extract an address from a string representing a dictionary. | |
Parameters: | |
- string_dict (str): A string containing address information. | |
Returns: | |
- str: A formatted address string (street, postal code, locality). | |
""" | |
address_keys = ["schema:streetAddress", "schema:postalCode", "schema:addressLocality"] | |
string_dict = string_dict.strip('"').replace('"', "'") | |
string_dict = string_dict.replace("': ", '": "').replace(", '", '", "').replace("'\"", '"' ).replace("\"'", '"' ).replace("{'", '{"').replace("'}", '"}') | |
address_dict = ast.literal_eval(string_dict) | |
address = [str(address_dict.get(key)) for key in address_keys] | |
adress_string = " ".join(address) | |
return adress_string | |
def get_parking_capacity(capacity_str): | |
""" | |
Parse and retrieve the 'mv:maximumValue' from a string representing a list of dictionaries. | |
The input string contains data in a JSON-like format, and this function extracts the | |
'mv:maximumValue' from the last dictionary in the list. | |
Parameters: | |
- capacity_str (str): A string representation of a list of dictionaries. | |
Returns: | |
- int or None: The value of the 'mv:maximumValue' key, or None if the key is not present. | |
Raises: | |
- ValueError: If the input string cannot be evaluated as a valid list of dictionaries. | |
""" | |
str_clean = capacity_str.replace("'", '"') | |
str_clean = str_clean.replace(": ,", ': None,') | |
data_list = eval(str_clean) | |
last_dict = data_list[-1] | |
return last_dict.get("mv:maximumValue") | |
def clean_phone_number(phone_number): | |
""" | |
Format a phone number by ensuring it starts with '0' and adding spaces every 2 digits. | |
Parameters: | |
- phone_number (int or str): The input phone number. | |
Returns: | |
- str: A formatted phone number (e.g., "01 23 45 67 89"). | |
""" | |
if not pd.isna(phone_number): | |
phone_number = "0" + str(int(phone_number)) | |
phone_number_slices_list = [phone_number[i: i+2] for i in range(0, 10, 2)] | |
phone_number = " ".join(phone_number_slices_list) | |
return phone_number | |
def latlon_to_webmercator(lat, lon): | |
""" | |
Convert latitude and longitude to Web Mercator coordinates. | |
Parameters: | |
- lat (float): Latitude in degrees | |
- lon (float): Longitude in degrees | |
Returns: | |
- (float, float): Web Mercator x, y coordinates | |
""" | |
R = 6378137 # Radius of the Earth in meters (WGS 84 standard) | |
x = R * math.radians(lon) # Convert longitude to radians and scale | |
y = R * math.log(math.tan(math.pi / 4 + math.radians(lat) / 2)) # Transform latitude | |
return x, y | |
def normalize_number(nb, data_range, expected_range): | |
""" | |
Normalize a number to fit within a target range while preserving its relative position. | |
This function maps a given input value (`nb`) from an original data range (`data_range`) | |
to a new expected target range (`expected_range`). The input number is scaled such that | |
its relative position in `data_range` is maintained in `expected_range`. | |
Parameters: | |
- nb (float): The input number to be normalized. | |
- data_range (tuple of float): A tuple containing two floats representing the input's original range (min, max). | |
- data_range[0] (float): The lower bound of the input's original range. | |
- data_range[1] (float): The upper bound of the input's original range. | |
- expected_range (tuple of float): A tuple containing two floats representing the desired target range (min, max). | |
- expected_range[0] (float): The lower bound of the desired target range. | |
- expected_range[1] (float): The upper bound of the desired target range. | |
Returns: | |
- float: The normalized value scaled to fit within the `expected_range`. | |
""" | |
result = nb | |
if (data_range[1] - data_range[0]) != 0: | |
result = expected_range[0] + (nb - data_range[0]) / (data_range[1] - data_range[0]) * (expected_range[1] - expected_range[0]) | |
return result | |
def prepare_general_info_dataframe(csv_filepath): | |
""" | |
Preprocess parking data from a CSV file. | |
Reads the file at `csv_filepath`, cleans and formats the data, | |
including address, phone number, capacity, coordinates (in lat/lon and Web Mercator), | |
and fills missing values. Renames columns for clarity. | |
Parameters: | |
- csv_filepath (str): Path to the CSV file with parking information. | |
Returns: | |
- pd.DataFrame: A cleaned DataFrame with standardized columns for further processing. | |
""" | |
df_general_info = pd.read_csv(csv_filepath, sep=";") | |
df_general_info['adresse'] = df_general_info['address'].apply(get_address) | |
df_general_info['capacité_total'] = df_general_info['capacity'].apply(get_parking_capacity) | |
df_general_info['téléphone'] = df_general_info['telephone'].apply(clean_phone_number) | |
df_general_info['lat'] = df_general_info['lat'].astype(str).str.replace(',', '.').astype(float) | |
df_general_info['lon'] = df_general_info['lon'].astype(str).str.replace(',', '.').astype(float) | |
df_general_info[["lon_mercator", "lat_mercator"]] = df_general_info.apply( | |
lambda row: pd.Series(latlon_to_webmercator(row["lat"], row["lon"])), | |
axis=1 | |
) | |
df_general_info["resumetarifshoraires"] = df_general_info["resumetarifshoraires"].fillna(" ") | |
df_general_info.rename( | |
columns={ | |
"name": "parking", | |
"url": "site_web", | |
"numberoflevels": "nombre de niveaux", | |
"vehicleheightlimitinm": "hauteur limite (mètre)", | |
"resumetarifshoraires": "tarifs", | |
}, | |
inplace=True | |
) | |
return df_general_info | |
def prepare_global_dataframe(df_general_info, df_parking_history): | |
""" | |
Merges general parking information with parking_history data. | |
Combines data from `df_general_info` and `df_parking_history` into a single DataFrame, | |
enriching historical data with additional details like parking address, capacity, | |
and coordinates. Formats columns, renames for clarity, and sorts by date. | |
Parameters: | |
- df_general_info (pd.DataFrame): DataFrame containing general parking information. | |
- df_parking_history (pd.DataFrame): DataFrame containing historical parking data. | |
Returns: | |
- pd.DataFrame: A merged and formatted DataFrame for further analysis or visualization. | |
""" | |
df_global = pd.merge( | |
left=df_parking_history, | |
right=df_general_info[['identifier', | |
'parking', | |
'site_web', | |
'adresse', | |
'nombre de niveaux', | |
'hauteur limite (mètre)', | |
'téléphone', | |
'tarifs', | |
'lon_mercator', | |
'lat_mercator', | |
'capacité_total']], | |
how='left', left_on='parking_id', | |
right_on='identifier' | |
) | |
df_global['heure'] = df_global['date'].apply(lambda x: x.strftime('%d %B %Y %H:%M:%S')) | |
df_global.rename( | |
columns={ | |
"nb_of_available_parking_spaces": "nombre_de_places_disponibles", | |
}, | |
inplace=True | |
) | |
df_global.sort_values('date', inplace=True) | |
return df_global | |
def prepare_sources(df_global, initial_parking_id=PARKING_ID_HOMEPAGE, data_table_columns_filter=DATA_TABLE_COLUMNS_FILTER): | |
""" | |
Prepares data sources for visualizations. | |
Generates ColumnDataSource objects for the global data, line plot, map, | |
and a transposed table based on the most recent values and selected parking. | |
Parameters: | |
- df_global (pd.DataFrame): The merged global DataFrame with parking data. | |
- initial_parking_id (int): ID of the parking lot to initialize plots. | |
- data_table_columns_filter (list): List of columns to include in the table. | |
Returns: | |
- tuple: Sources for global data, line plot, map, and transposed table. | |
""" | |
df_more_recent_value = df_global.groupby('parking_id').agg({'date': 'max'}) | |
df_map = df_global.merge(df_more_recent_value , on=['parking_id', 'date']) | |
df_line_plot = df_global[df_global['parking_id']==initial_parking_id] | |
df_table = df_map[df_map['parking_id']==initial_parking_id] | |
transposed_data = { | |
"Field": data_table_columns_filter, | |
"Value": [df_table.iloc[0][col] for col in data_table_columns_filter] | |
} | |
source_original = ColumnDataSource(df_global) | |
source_line_plot = ColumnDataSource(df_line_plot) | |
source_map = ColumnDataSource(df_map) | |
source_table = ColumnDataSource(transposed_data) | |
return source_original, source_line_plot, source_map, source_table | |
def add_circle_size_to_source_map(source_map, circle_size_bounds=CIRCLE_SIZE_BOUNDS): | |
""" | |
Adds normalized circle sizes to the source map based on available spaces. | |
Parameters: | |
- source_map (ColumnDataSource): Map data source with parking availability. | |
- circle_size_bounds (tuple): Min and max bounds for circle sizes. | |
Returns: | |
- None: Updates `source_map` in place with a `normalized_circle_size` field. | |
""" | |
available_spaces_range = ( | |
min(source_map.data["nombre_de_places_disponibles"]), | |
max(source_map.data["nombre_de_places_disponibles"]) | |
) | |
normalized_circle_sizes = [normalize_number(x, available_spaces_range, circle_size_bounds) | |
for x in source_map.data["nombre_de_places_disponibles"]] | |
source_map.data['normalized_circle_size'] = normalized_circle_sizes | |
def generate_map_plot(source_map, lyon_x, lyon_y, zoom_level=ZOOM_LEVEL): | |
""" | |
Creates an interactive map plot with parking data. | |
Parameters: | |
- source_map (ColumnDataSource): Data source for map visualization. | |
- lyon_x, lyon_y (float): Mercator coordinates for map center. | |
- zoom_level (float): Zoom level for the map. | |
Returns: | |
- Figure: Bokeh map plot with hover and selection tools. | |
""" | |
color_mapper = linear_cmap(field_name="nombre_de_places_disponibles", | |
palette="Viridis256", | |
low=min(source_map.data["nombre_de_places_disponibles"]), | |
high=max(source_map.data["nombre_de_places_disponibles"])) | |
hover_map = HoverTool( | |
tooltips = [ | |
('nom', '@parking'), | |
('places disponibles', "@nombre_de_places_disponibles"), | |
('capacité', '@{capacité_total}'), | |
], | |
) | |
tap_tool = TapTool() | |
p_map = figure( | |
x_range=(lyon_x - zoom_level, lyon_x + zoom_level), | |
y_range=(lyon_y - zoom_level, lyon_y + zoom_level), | |
x_axis_type="mercator", | |
y_axis_type="mercator", | |
tools=[hover_map, 'pan', 'wheel_zoom'], | |
) | |
p_map.add_tools(tap_tool) | |
p_map.toolbar.active_tap = tap_tool | |
p_map.add_tile(xyz.OpenStreetMap.Mapnik) | |
circle_renderer = p_map.scatter( | |
x="lon_mercator", | |
y="lat_mercator", | |
source=source_map, | |
size="normalized_circle_size", | |
fill_color=color_mapper, | |
fill_alpha=1, | |
) | |
circle_renderer.nonselection_glyph = None | |
circle_renderer.selection_glyph = None | |
return p_map | |
def generate_line_plot(source_line_plot): | |
""" | |
Creates a line plot to show the history of available parking spaces. | |
Parameters: | |
- source_line_plot (ColumnDataSource): Data source for the line plot. | |
Returns: | |
- Figure: Bokeh line plot with hover and zoom tools. | |
""" | |
hover_line = HoverTool( | |
tooltips = [ | |
('Places disponibles', "@nombre_de_places_disponibles"), | |
('Heure', '@date{%a-%H:%M:%S}'), | |
], | |
formatters={'@date': 'datetime'}, | |
) | |
p_line = figure( | |
title=f"Historique des places disponibles - LINE PLOT", | |
height = 400, | |
width = 700, | |
x_axis_type="datetime", | |
x_axis_label="Date", | |
y_axis_label="Nombre de places disponibles", | |
tools=[hover_line, "crosshair", "pan", "wheel_zoom"], | |
align = ('center', 'center') | |
) | |
p_line.line( | |
"date", | |
"nombre_de_places_disponibles", | |
source=source_line_plot, | |
line_width=2, | |
legend_field = "parking", | |
) | |
p_line.legend.location = "top_left" | |
p_line.xaxis.formatter = DatetimeTickFormatter(days="%d/%m/%Y") | |
return p_line | |
def generate_data_table(source_table): | |
""" | |
Creates a data table to display parking information. | |
Parameters: | |
- source_table (ColumnDataSource): Data source for the table. | |
Returns: | |
- DataTable: A Bokeh data table displaying parking details. | |
""" | |
columns_tranposed = [ | |
TableColumn(field="Field", title="Champ"), | |
TableColumn(field="Value", title="Valeur"), | |
] | |
data_table = DataTable( | |
source=source_table, | |
columns=columns_tranposed, | |
editable=True, | |
width=1000, | |
height=200, | |
index_position=None, | |
header_row=False, | |
fit_columns = True, | |
) | |
return data_table | |
def generate_data_table_url(source_line_plot): | |
""" | |
Creates a data table with clickable URLs for parking websites. | |
Parameters: | |
- source_line_plot (ColumnDataSource): Data source for the table. | |
Returns: | |
- DataTable: A Bokeh data table displaying parking website links. | |
""" | |
cds_view = CDSView() | |
cds_view.filter = IndexFilter([0]) | |
column = TableColumn( | |
field="site_web", | |
title="site web", | |
formatter=HTMLTemplateFormatter(template='<a href="<%= site_web %>"><%= site_web %></a>') | |
) | |
data_url = DataTable( | |
source=source_line_plot, | |
columns=[column], | |
editable=True, | |
width=600, | |
height=50, | |
index_position=None, | |
view=cds_view | |
) | |
return data_url | |
def create_selection_callback(source_map, source_line_plot, source_table, source_original, p_line): | |
""" | |
Creates a CustomJS callback for updating data source based on user selection. | |
Parameters: | |
- source_map (ColumnDataSource): The source for the map data. | |
- source_line_plot (ColumnDataSource): The source for the line plot data. | |
- source_table (ColumnDataSource): The source for the data table. | |
- source_original (ColumnDataSource): The source for the original dataset. | |
- p_line (Figure): Bokeh step plot. | |
Returns: | |
- CustomJS: The JavaScript callback. | |
""" | |
callback = CustomJS( | |
args=dict( | |
s_map=source_map, | |
s_line=source_line_plot, | |
s_table=source_table, | |
s_original=source_original, | |
p_line=p_line), | |
code= | |
""" | |
var data_map = s_map.data | |
var data_original = s_original.data | |
var selected_index = cb_obj.indices[0] | |
if (selected_index !== undefined) { | |
var parking_id = data_map['identifier'][selected_index] | |
// Update s_line | |
var line_plot_data = {}; | |
for (var key in data_original) { | |
line_plot_data[key] = []; | |
} | |
for (var i = 0; i < data_original['parking_id'].length; i++) { | |
if (data_original['parking_id'][i] === parking_id) { | |
for (var key in data_original) { | |
line_plot_data[key].push(data_original[key][i]); | |
} | |
} | |
} | |
s_line.data = line_plot_data | |
s_line.change.emit() | |
// Specify new axis range for the history plots | |
var x_min = Math.min(...line_plot_data['date'].map(d => new Date(d).getTime())); | |
var x_max = Math.max(...line_plot_data['date'].map(d => new Date(d).getTime())); | |
var y_min = Math.min(...line_plot_data['nombre_de_places_disponibles']); | |
var y_max = Math.max(...line_plot_data['nombre_de_places_disponibles']); | |
var x_padding = 0.1 * (x_max - x_min); | |
var y_padding = 0.1 * (y_max - y_min); | |
p_line.x_range.setv({ start: x_min - x_padding, end: x_max + x_padding }); | |
p_line.y_range.setv({ start: y_min - y_padding, end: y_max + y_padding }); | |
p_line.change.emit(); | |
// Update s_table | |
var max_date_index = 0 | |
var max_date = new Date(Math.max(...line_plot_data['date'].map(d => new Date(d)))) | |
for (var i = 0; i < line_plot_data['date'].length; i++) { | |
if (new Date(line_plot_data['date'][i]).getTime() === max_date.getTime()) { | |
max_date_index = i; | |
break; | |
} | |
} | |
var filter_columns = ["parking", "heure", "capacité_total", "nombre_de_places_disponibles", "nombre de niveaux", "hauteur limite (mètre)", "téléphone", "tarifs", "adresse"]; | |
var table_data = { | |
"Field": [], | |
"Value": [] | |
}; | |
for (var key of filter_columns) { | |
var value = line_plot_data[key][max_date_index]; | |
table_data["Field"].push(key); | |
table_data["Value"].push(value); | |
} | |
s_table.data = table_data | |
} | |
""" | |
) | |
return callback | |
def main(): | |
""" | |
Main function: | |
- Loads and processes data from CSV files. | |
- Prepares data sources for Bokeh visualizations. | |
- Creates interactive map, line plot, and data table components. | |
- Builds and returns a cohesive Bokeh layout. | |
""" | |
df_general_info = prepare_general_info_dataframe(GENERAL_INFO_CSV_FILEPATH) | |
df_parking_history = pd.read_csv(PARKING_HISTORY_CSV_FILEPATH, index_col='id', parse_dates=[4]) | |
df_global = prepare_global_dataframe(df_general_info, df_parking_history) | |
source_original, source_line_plot, source_map, source_table = prepare_sources(df_global) | |
add_circle_size_to_source_map(source_map, circle_size_bounds=CIRCLE_SIZE_BOUNDS) | |
lyon_x, lyon_y = latlon_to_webmercator(LATITUDE_LYON, LONGITUDE_LYON) | |
p_map = generate_map_plot(source_map, lyon_x, lyon_y, zoom_level=ZOOM_LEVEL) | |
p_line = generate_line_plot(source_line_plot) | |
data_table = generate_data_table(source_table) | |
data_table_url = generate_data_table_url(source_line_plot) | |
callback = create_selection_callback(source_map, source_line_plot, | |
source_table, | |
source_original, | |
p_line) | |
source_map.selected.js_on_change('indices', callback) | |
first_row_layout = row([p_map, p_line]) | |
bokeh_general_layout = column([first_row_layout, data_table, data_table_url]) | |
return bokeh_general_layout | |
bokeh_general_layout = main() | |