Spaces:
Running
Running
from dash import html, dcc, callback, Input, Output, State | |
import dash_mantine_components as dmc | |
import plotly.express as px | |
import pandas as pd | |
import glob | |
import json | |
import plotly.graph_objects as go | |
from plotly.subplots import make_subplots | |
import plotly.figure_factory as ff | |
from collections import Counter | |
import numpy as np | |
import re | |
from wordcloud import WordCloud | |
import networkx as nx | |
import matplotlib.pyplot as plt | |
import base64 | |
from io import BytesIO | |
import textwrap | |
def readExcel(data): | |
return pd.read_excel(data) | |
def datatomultiplerowswithoutcomment(df, column_name, sep=', '): | |
df_rows = df.copy() | |
df_rows[column_name] = df_rows[column_name].str.split(sep) | |
df_rows = df_rows.explode(column_name) | |
df_rows = df_rows[df_rows[column_name] != ''] | |
df_rows[column_name] = df_rows[column_name].apply(lambda x: str(x).split(' (')).apply(lambda x: x[0]) | |
return df_rows | |
def barplotmonovariablecount(df, column_name, title): | |
df_monovariablecount = df.groupby(column_name).size().reset_index(name='obs') | |
df_monovariablecount = df_monovariablecount.sort_values(by=['obs']) | |
fig_monovariablecount = px.bar(df_monovariablecount, x='obs', y=column_name, width=800, height=400, orientation='h', color='obs', template="plotly_dark",title=title, labels={'obs':'nombre'}, color_continuous_scale="Teal", text_auto=True).update_layout(paper_bgcolor="#060621",font=dict(size=10,color="#ffffff"),autosize=True, coloraxis_showscale=False).update_traces(hovertemplate=df_monovariablecount[column_name] + ' <br>Nombre : %{x}', y=[y[:100] + "..." for y in df_monovariablecount[column_name]], showlegend=False) | |
return fig_monovariablecount | |
def barplotbivariablecount(df, column_name1, column_name2, title): | |
df_bivariablecount = df.groupby([column_name1, column_name2]).size().reset_index(name='obs') | |
df_bivariablecount = df_bivariablecount.sort_values(by=['obs']) | |
fig_bivariablecount = px.bar(df_bivariablecount, y=column_name1, x='obs', orientation='h', width=800, height=400, color=column_name2, template="plotly_dark", title=title, labels={'obs':'nombre'}, color_discrete_sequence=px.colors.qualitative.Safe, text_auto=True).update_layout(font=dict(size=10,color="#ffffff"),paper_bgcolor="#060621",autosize=True, coloraxis_showscale=False) | |
return fig_bivariablecount | |
def multiwordcloud(df): | |
exclure_mots = ['ue', 'précisez', 'd', 'du', 'de', 'la', 'las', 'des', 'le', 'et', 'est', 'elle', 'une', 'en', 'que', 'aux', 'qui', 'ces', 'les', 'dans', 'sur', 'l', 'un', 'pour', 'par', 'il', 'ou', 'à', 'ce', 'a', 'sont', 'cas', 'plus', 'leur', 'se', 's', 'vous', 'au', 'c', 'aussi', 'toutes', 'autre', 'comme'] | |
countFigure = 1 | |
thematiques = ['Durable','Résilient','Sûr','Inclusive'] | |
figures_list = [] | |
for thematique in thematiques: | |
fig = plt.figure(countFigure,figsize=(10,12),facecolor="#060621") | |
count=1 | |
diplomes = ['BUT','Licence','Licence professionnelle','Master'] | |
for diplome in diplomes: | |
df_test = df[(df['Thématiques ODD11'] == thematique) & (df['Diplôme'] == diplome)] | |
list_test = df_test["Référence et intitulé de l'UE"].tolist() | |
words=". ".join(list_test) | |
words = words.lower() | |
words=words.replace(r'[-./?!,":;()\']',' ') | |
if words: | |
wordcloud = WordCloud(background_color='#ffffff', stopwords=exclure_mots, max_words=100).generate(words) | |
if count <= 2: | |
plt.subplot(1,2,count) | |
else: | |
plt.subplot(2,2,count) | |
plt.imshow(wordcloud,interpolation="bilinear") | |
plt.axis('off') | |
plt.title(thematique + ' - ' + diplome,fontdict={'fontsize':'medium','color':'#ffffff'}) | |
count = count + 1 | |
# Save it to a temporary buffer. | |
buf = BytesIO() | |
fig.savefig(buf, format="png") | |
# Embed the result in the html output. | |
fig_data = base64.b64encode(buf.getbuffer()).decode("ascii") | |
figures_list.append(f'data:image/png;base64,{fig_data}') | |
countFigure = countFigure + 1 | |
return figures_list | |
def matrixlist(df): | |
list_thematique = df["Intitulé"].values.tolist() | |
#list_thematique = sorted(list_thematique) | |
list_thematique = list(set(list_thematique)) | |
matrix = pd.DataFrame(0, index=list_thematique, columns=['Durable','Inclusive','Sûr','Résilient']) | |
for formation in list_thematique: | |
for thematique in ['Durable','Inclusive','Sûr','Résilient']: | |
df_test = df[df['Thématiques ODD11'] == thematique] | |
if formation in df_test.values : | |
matrix.loc[formation, thematique] = 1 # Replace with actual condition logic | |
return matrix | |
def matrixcorrelation(matrix,df): | |
list = df["Intitulé"].values.tolist() | |
fig = go.Figure(data=go.Heatmap( | |
z=matrix.values, | |
x=matrix.columns, | |
y=matrix.index, | |
colorscale=[ | |
[0, 'rgba(6,6,33,1)'], | |
[0.2, 'rgba(6,6,33,1)'], | |
[0.2, '#FF69B4'], # Rose pour technique | |
[0.4, '#FF69B4'], | |
[0.4, '#4169E1'], # Bleu pour management | |
[0.6, '#4169E1'], | |
[0.6, '#32CD32'], # Vert pour environnement | |
[0.8, '#32CD32'], | |
[0.8, '#FFD700'], # Jaune pour économie | |
[1.0, '#32CD32'] | |
], | |
showscale=False, | |
)) | |
# Ajout des bordures aux cellules | |
fig.update_traces( | |
xgap=1, | |
ygap=1, | |
) | |
# Mise en forme | |
fig.update_layout( | |
title='Matrice des thématiques ODD11<br>par formation', | |
xaxis=dict( | |
side='top', | |
tickangle=45, | |
tickfont=dict(size=10), | |
), | |
yaxis=dict( | |
autorange='reversed', | |
tickfont=dict(size=10), | |
), | |
width=1200, | |
#height=300, | |
height=len(list) * 20, | |
template='plotly_dark', | |
paper_bgcolor = 'rgba(6,6,33,1)', | |
plot_bgcolor='rgba(6,6,33,1)', | |
margin=dict( | |
t=100, | |
l=300, | |
r=100, | |
b=50 | |
), | |
#annotations=annotations, | |
hovermode="x unified",hoverlabel=dict( | |
bgcolor='rgba(8,8,74,1)', | |
font_size=10, | |
) | |
) | |
# Personnalisation du style des axes | |
fig.update_xaxes( | |
#showspikes=True, | |
showgrid=True, | |
gridwidth=1, | |
gridcolor='lightgrey', | |
) | |
fig.update_yaxes( | |
#showspikes=True, | |
showgrid=True, | |
gridwidth=1, | |
gridcolor='lightgrey', | |
) | |
# Ajout d'un hover template personnalisé | |
hover_text = [] | |
df_info = df[["Thématiques ODD11","Référence et intitulé de l'UE","Pratiques pédagogiques","Intitulé"]].copy() | |
#df_info = df_info.drop_duplicates(subset=['Thématiques ODD11']) | |
df_info.set_index("Thématiques ODD11", inplace=True) | |
for idx in matrix.index: | |
row = [] | |
for col in matrix.columns: | |
if matrix.loc[idx,col] == 1: | |
#df_psycho = df_score[(df_score['Thématiques Pedago'].str.contains(row['Thématiques Pedago'])) & (df_score['labStructName_s'] == row['labStructName_s'])] | |
df_extract = df_info.loc[col] | |
df_test = df_extract[df_extract["Intitulé"] == idx] | |
ue = df_test["Référence et intitulé de l'UE"].values.tolist()[0] | |
pedagogie = df_test["Pratiques pédagogiques"].values.tolist()[0] | |
label_y = idx | |
row.append( | |
f'<b>💼 Formation: {"<br>".join(textwrap.wrap(label_y,width=70))}</b><br>' + | |
f'<b>📣 Thématique ODD11: {col.capitalize()}</b><br><br>' + | |
f'💡 Référence et intitulé de l\'UE : {"<br>".join(textwrap.wrap(ue,width=80))}<br><br>' + | |
f'📚 Pratiques pédagogiques: <br>' + str(pedagogie) + '<br><br>' | |
) | |
else: | |
row.append('') | |
hover_text.append(row) | |
fig.update_traces( | |
hovertemplate="%{customdata}<extra></extra>", | |
customdata=hover_text, | |
#y=[y[0:-10].replace('(','') if y.find('(essential)')!=-1 or y.find('(optional)')!=-1 else y for y in color_values.index] | |
) | |
return fig | |
def create_analysis_page(title, label, data): | |
# This is dummy data for the bar chart | |
df = readExcel(data) | |
if label == "Analyse ODD 11 formation": | |
df_figure = datatomultiplerowswithoutcomment(df, 'Thématiques ODD11', sep=', ') | |
fig1 = barplotmonovariablecount(df_figure, f'Thématiques ODD11', f"Répartition des thématiques ODD11") | |
fig2 = barplotbivariablecount(df_figure, f'Thématiques ODD11', f'Diplôme', f"Répartition des thématiques ODD11 par type diplôme") | |
fig3 = multiwordcloud(df_figure) | |
fig4 = matrixcorrelation(matrixlist(df_figure),df_figure) | |
else: | |
fig1 = go.Figure() | |
fig1.add_annotation(text="Aucun fichier de données fourni pour cette analyse.", xref="paper", yref="paper", showarrow=False, font=dict(size=20)) | |
fig2 = go.Figure() | |
fig2.add_annotation(text="Aucun fichier de données fourni pour cette analyse.", xref="paper", yref="paper", showarrow=False, font=dict(size=20)) | |
fig3 = plt.Figure() | |
buf = BytesIO() | |
fig3.savefig(buf, format="png") | |
fig_data = base64.b64encode(buf.getbuffer()).decode("ascii") | |
fig3 = f'data:image/png;base64,{fig_data}' | |
fig4 = go.Figure() | |
fig4.add_annotation(text="Aucun fichier de données fourni pour cette analyse.", xref="paper", yref="paper", showarrow=False, font=dict(size=20)) | |
#return dmc.Container( | |
return dmc.Container( | |
[ | |
dmc.Title(title, order=2, mb="lg"), | |
dmc.Grid( | |
[ | |
dmc.GridCol(dcc.Loading(dcc.Graph(figure=fig1)), span=12), | |
dmc.GridCol(dcc.Loading(dcc.Graph(figure=fig2)), span=12), | |
dmc.GridCol(dcc.Loading(dcc.Graph(figure=fig4)), span=12), | |
dmc.GridCol( | |
[ | |
dcc.Loading(html.Img(src=fig3[0], style={'width':'100%', 'height':'auto','padding':'0px','margin-top':'-300px'}), | |
), | |
dcc.Loading(html.Img(src=fig3[1], style={'width':'100%', 'height':'auto','padding':'0px','margin-top':'-200px'}), | |
), | |
dcc.Loading(html.Img(src=fig3[2], style={'width':'100%', 'height':'auto','padding':'0px','margin-top':'-200px'}), | |
), | |
dcc.Loading(html.Img(src=fig3[3], style={'width':'100%', 'height':'auto','padding':'0px','margin-top':'-200px'}),), | |
], | |
span=12), | |
#dmc.GridCol( | |
# [ | |
# dmc.Title("Résumé de l'analyse", order=3), | |
# dmc.Text( | |
# "Génération de la note de synthèse en temps réel...", | |
# id="summary-note", | |
# ), | |
# ], | |
# span=4, | |
#), | |
] | |
), | |
], | |
fluid=True, | |
p="lg", | |
pt="xl" | |
) |