Spaces:

GIZ
/

Development-Project-Synergy-Finder

Sleeping

App Files Files Community

Jan Mühlnikel commited on Mar 18, 2024

Commit

55a6bd8

1 Parent(s): f123b98

added crs5 and sdg selection

Browse files

Files changed (8) hide show

__pycache__/similarity_page.cpython-310.pyc +0 -0
functions/__pycache__/calc_matches.cpython-310.pyc +0 -0
functions/__pycache__/filter_projects.cpython-310.pyc +0 -0
functions/calc_matches.py +1 -1
functions/filter_projects.py +13 -4
modules/__pycache__/result_table.cpython-310.pyc +0 -0
modules/result_table.py +12 -2
similarity_page.py +38 -1

__pycache__/similarity_page.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/similarity_page.cpython-310.pyc and b/__pycache__/similarity_page.cpython-310.pyc differ

functions/__pycache__/calc_matches.cpython-310.pyc CHANGED Viewed

Binary files a/functions/__pycache__/calc_matches.cpython-310.pyc and b/functions/__pycache__/calc_matches.cpython-310.pyc differ

functions/__pycache__/filter_projects.cpython-310.pyc CHANGED Viewed

Binary files a/functions/__pycache__/filter_projects.cpython-310.pyc and b/functions/__pycache__/filter_projects.cpython-310.pyc differ

functions/calc_matches.py CHANGED Viewed

@@ -14,7 +14,7 @@ def calc_matches(filtered_df, project_df, similarity_matrix):
     match_matrix = similarity_matrix[filtered_df_indecies_list]
     # get row (project1) and column (project2) with highest similarity in filtered df
-    top_indices = np.unravel_index(np.argsort(match_matrix, axis=None)[-60:], match_matrix.shape)
     # get the corresponding similarity values
     top_values = match_matrix[top_indices]

     match_matrix = similarity_matrix[filtered_df_indecies_list]
     # get row (project1) and column (project2) with highest similarity in filtered df
+    top_indices = np.unravel_index(np.argsort(match_matrix, axis=None)[-30:], match_matrix.shape)
     # get the corresponding similarity values
     top_values = match_matrix[top_indices]

functions/filter_projects.py CHANGED Viewed

@@ -1,13 +1,22 @@
 import pandas as pd
 def contains_code(crs_codes, code_list):
     codes = str(crs_codes).split(';')
     return any(code in code_list for code in codes)
-def filter_projects(df, crs3_list):
-    filtered_crs_df = df[df['crs_3_code'].apply(lambda x: contains_code(x, crs3_list))]
-    return filtered_crs_df

 import pandas as pd
 def contains_code(crs_codes, code_list):
     codes = str(crs_codes).split(';')
     return any(code in code_list for code in codes)
+def filter_projects(df, crs3_list, crs5_list, sdg_str):
+    if crs3_list != [] or crs5_list != [] or sdg_str != "":
+        if crs3_list and not crs5_list:
+            df = df[df['crs_3_code'].apply(lambda x: contains_code(x, crs3_list))]
+        elif crs3_list and crs5_list:
+            df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))]
+        elif not crs3_list and crs5_list:
+            df = df[df['crs_5_code'].apply(lambda x: contains_code(x, crs5_list))]
+        if sdg_str != "":
+            df = df[df["sgd_pred_code"] == int(sdg_str)]
+        return df

modules/__pycache__/result_table.cpython-310.pyc CHANGED Viewed

Binary files a/modules/__pycache__/result_table.cpython-310.pyc and b/modules/__pycache__/result_table.cpython-310.pyc differ

modules/result_table.py CHANGED Viewed

@@ -7,10 +7,15 @@ def show_table(p1_df, p2_df):
         st.write("------------------")
         st.dataframe(
-            p1_df[["title_main", "orga_abbreviation", "client", "description_main", "country", "sgd_pred_code", "crs_3_code", "crs_5_code", "similarity"]],
             use_container_width = True,
             height = 35 + 35 * len(p1_df),
             column_config={
                 "orga_abbreviation": st.column_config.TextColumn(
                     "Organization",
                     help="If description not in English, description in other language provided",
@@ -59,10 +64,15 @@ def show_table(p1_df, p2_df):
         st.write("------------------")
         st.dataframe(
-            p2_df[["title_main", "orga_abbreviation", "client", "description_main", "country", "sgd_pred_code", "crs_3_code", "crs_5_code", "similarity"]],
             use_container_width = True,
             height = 35 + 35 * len(p2_df),
             column_config={
                 "orga_abbreviation": st.column_config.TextColumn(
                     "Organization",
                     help="If description not in English, description in other language provided",

         st.write("------------------")
         st.dataframe(
+            p1_df[["iati_id", "title_main", "orga_abbreviation", "client", "description_main", "country", "sgd_pred_code", "crs_3_code", "crs_5_code", "similarity"]],
             use_container_width = True,
             height = 35 + 35 * len(p1_df),
             column_config={
+                "iati_id": st.column_config.TextColumn(
+                    "IATI ID",
+                    help="IATI Project ID",
+                    disabled=True
+                ),
                 "orga_abbreviation": st.column_config.TextColumn(
                     "Organization",
                     help="If description not in English, description in other language provided",
         st.write("------------------")
         st.dataframe(
+            p2_df[["iati_id", "title_main", "orga_abbreviation", "client", "description_main", "country", "sgd_pred_code", "crs_3_code", "crs_5_code", "similarity"]],
             use_container_width = True,
             height = 35 + 35 * len(p2_df),
             column_config={
+                "iati_id": st.column_config.TextColumn(
+                    "IATI ID",
+                    help="IATI Project ID",
+                    disabled=True
+                ),
                 "orga_abbreviation": st.column_config.TextColumn(
                     "Organization",
                     help="If description not in English, description in other language provided",

similarity_page.py CHANGED Viewed

@@ -17,6 +17,7 @@ from functions.filter_projects import filter_projects
 from functions.calc_matches import calc_matches
 import psutil
 import os
 def get_process_memory():
     process = psutil.Process(os.getpid())
@@ -116,6 +117,7 @@ def show_page():
     st.write(f"Current RAM usage of this app: {get_process_memory():.2f} MB")
     st.write("Similarities")
     col1, col2 = st.columns([1, 1])
     with col1:
         # CRS 3 SELECTION
@@ -124,6 +126,31 @@ def show_page():
                         CRS3_MERGED,
                         placeholder="Select"
                         )
     with col2:
         st.write("x")
@@ -131,12 +158,22 @@ def show_page():
     # CRS CODE LIST
     crs3_list = [i[-3:] for i in crs3_option]
     # FILTER DF WITH SELECTED FILTER OPTIONS
-    filtered_df = filter_projects(projects_df, crs3_list)
     # FIND MATCHES
     p1_df, p2_df = calc_matches(filtered_df, projects_df, sim_matrix)
     # SHOW THE RESULT
     show_table(p1_df, p2_df)

 from functions.calc_matches import calc_matches
 import psutil
 import os
+import gc
 def get_process_memory():
     process = psutil.Process(os.getpid())
     st.write(f"Current RAM usage of this app: {get_process_memory():.2f} MB")
     st.write("Similarities")
+    st.session_state.crs5_option_disabled = True
     col1, col2 = st.columns([1, 1])
     with col1:
         # CRS 3 SELECTION
                         CRS3_MERGED,
                         placeholder="Select"
                         )
+        # CRS 5 SELECTION
+        ## Only enable crs5 select field when crs3 code is selected
+        if crs3_option != []:
+            st.session_state.crs5_option_disabled = False
+        ## define list of crs5 codes dependend on crs3 codes
+        crs5_list = [txt[0].replace('"', "") for crs3_item in crs3_option for code, txt in CRS5_MERGED.items() if str(code)[:3] == str(crs3_item)[-3:]]
+        ## crs5 select field
+        crs5_option = st.multiselect(
+            'CRS 5',
+            crs5_list,
+            placeholder="Select",
+            disabled=st.session_state.crs5_option_disabled
+            )
+        # SDG SELECTION
+        sdg_option = st.selectbox(
+                label = 'SDG',
+                index = None,
+                placeholder = "Select SDG",
+                options = SDG_NAMES[:-1],
+                )
     with col2:
         st.write("x")
     # CRS CODE LIST
     crs3_list = [i[-3:] for i in crs3_option]
+    crs5_list = [i[-5:] for i in crs5_option]
+    # SDG CODE LIST
+    if sdg_option != None:
+        sdg_str = sdg_option[0]
+    else:
+        sdg_str = ""
     # FILTER DF WITH SELECTED FILTER OPTIONS
+    filtered_df = filter_projects(projects_df, crs3_list, crs5_list, sdg_str)
     # FIND MATCHES
     p1_df, p2_df = calc_matches(filtered_df, projects_df, sim_matrix)
     # SHOW THE RESULT
     show_table(p1_df, p2_df)
+    del p1_df, p2_df, crs3_list, crs5_list, sdg_str, filtered_df
+    gc.collect()