OrgStats

Running

App Files Files Community

evijit HF Staff commited on about 1 month ago

Commit

27c66d1

verified ·

1 Parent(s): 4d0811f

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -4

app.py CHANGED Viewed

@@ -135,7 +135,7 @@ def extract_org_from_id(model_id):
         return model_id.split("/")[0]
     return "unaffiliated"
-def make_treemap_data(df, count_by, top_k=25, tag_filter=None, pipeline_filter=None, size_filter=None):
     """Process DataFrame into treemap format with filters applied"""
     # Create a copy to avoid modifying the original
     filtered_df = df.copy()
@@ -158,6 +158,10 @@ def make_treemap_data(df, count_by, top_k=25, tag_filter=None, pipeline_filter=N
     # Add organization column
     filtered_df["organization"] = filtered_df["id"].apply(extract_org_from_id)
     # Aggregate by organization
     org_totals = filtered_df.groupby("organization")[count_by].sum().reset_index()
     org_totals = org_totals.sort_values(by=count_by, ascending=False)
@@ -215,7 +219,7 @@ def create_treemap(treemap_data, count_by, title=None):
     return fig
-def load_models_csv():
     # Read the CSV file
     df = pd.read_csv('models.csv')
@@ -419,6 +423,12 @@ with gr.Blocks() as demo:
                 step=5,
                 info="Number of top organizations to include"
             )
             generate_plot_button = gr.Button("Generate Plot", variant="primary")
@@ -426,7 +436,7 @@ with gr.Blocks() as demo:
             plot_output = gr.Plot()
             stats_output = gr.Markdown("*Generate a plot to see statistics*")
-    def generate_plot_on_click(count_by, filter_choice, tag_filter, pipeline_filter, size_filter, top_k, data_df):
         print(f"Generating plot with: Metric={count_by}, Filter={filter_choice}, Tag={tag_filter}, Pipeline={pipeline_filter}, Size={size_filter}, Top K={top_k}")
         if data_df is None or len(data_df) == 0:
@@ -444,6 +454,12 @@ with gr.Blocks() as demo:
         if size_filter != "None":
             selected_size_filter = size_filter
         # Process data for treemap
         treemap_data = make_treemap_data(
             df=data_df,
@@ -451,7 +467,8 @@ with gr.Blocks() as demo:
             top_k=top_k,
             tag_filter=selected_tag_filter,
             pipeline_filter=selected_pipeline_filter,
-            size_filter=selected_size_filter
         )
         # Create plot
@@ -484,6 +501,10 @@ with gr.Blocks() as demo:
             for org, value in top_5_orgs.items():
                 percentage = (value / total_value) * 100
                 stats_md += f"\n| {org} | {int(value):,} | {percentage:.2f}% |"
         return fig, stats_md
@@ -518,6 +539,7 @@ with gr.Blocks() as demo:
             pipeline_filter_dropdown,
             size_filter_dropdown,
             top_k_slider,
             models_data
         ],
         outputs=[plot_output, stats_output]

         return model_id.split("/")[0]
     return "unaffiliated"
+def make_treemap_data(df, count_by, top_k=25, tag_filter=None, pipeline_filter=None, size_filter=None, skip_orgs=None):
     """Process DataFrame into treemap format with filters applied"""
     # Create a copy to avoid modifying the original
     filtered_df = df.copy()
     # Add organization column
     filtered_df["organization"] = filtered_df["id"].apply(extract_org_from_id)
+    # Skip organizations if specified
+    if skip_orgs and len(skip_orgs) > 0:
+        filtered_df = filtered_df[~filtered_df["organization"].isin(skip_orgs)]
     # Aggregate by organization
     org_totals = filtered_df.groupby("organization")[count_by].sum().reset_index()
     org_totals = org_totals.sort_values(by=count_by, ascending=False)
     return fig
+def load_models_csv():
     # Read the CSV file
     df = pd.read_csv('models.csv')
                 step=5,
                 info="Number of top organizations to include"
             )
+            skip_orgs_textbox = gr.Textbox(
+                label="Organizations to Skip (comma-separated)",
+                placeholder="e.g., openai, meta, huggingface",
+                info="Enter names of organizations to exclude from the visualization"
+            )
             generate_plot_button = gr.Button("Generate Plot", variant="primary")
             plot_output = gr.Plot()
             stats_output = gr.Markdown("*Generate a plot to see statistics*")
+    def generate_plot_on_click(count_by, filter_choice, tag_filter, pipeline_filter, size_filter, top_k, skip_orgs_text, data_df):
         print(f"Generating plot with: Metric={count_by}, Filter={filter_choice}, Tag={tag_filter}, Pipeline={pipeline_filter}, Size={size_filter}, Top K={top_k}")
         if data_df is None or len(data_df) == 0:
         if size_filter != "None":
             selected_size_filter = size_filter
+        # Process skip organizations list
+        skip_orgs = []
+        if skip_orgs_text and skip_orgs_text.strip():
+            skip_orgs = [org.strip() for org in skip_orgs_text.split(',') if org.strip()]
+            print(f"Skipping organizations: {skip_orgs}")
         # Process data for treemap
         treemap_data = make_treemap_data(
             df=data_df,
             top_k=top_k,
             tag_filter=selected_tag_filter,
             pipeline_filter=selected_pipeline_filter,
+            size_filter=selected_size_filter,
+            skip_orgs=skip_orgs
         )
         # Create plot
             for org, value in top_5_orgs.items():
                 percentage = (value / total_value) * 100
                 stats_md += f"\n| {org} | {int(value):,} | {percentage:.2f}% |"
+            # Add note about skipped organizations if any
+            if skip_orgs:
+                stats_md += f"\n\n*Note: {len(skip_orgs)} organization(s) excluded: {', '.join(skip_orgs)}*"
         return fig, stats_md
             pipeline_filter_dropdown,
             size_filter_dropdown,
             top_k_slider,
+            skip_orgs_textbox,
             models_data
         ],
         outputs=[plot_output, stats_output]