ModelVerse

Running

App Files Files Community

evijit HF Staff commited on 25 days ago

Commit

d858aa5

verified ·

1 Parent(s): b06975a

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -50

app.py CHANGED Viewed

@@ -4,43 +4,82 @@ import gradio as gr
 import pandas as pd
 import plotly.express as px
 import time
 from datasets import load_dataset
 # --- Constants ---
 PARAM_CHOICES = ['< 1B', '1B', '5B', '12B', '32B', '64B', '128B', '256B', '> 500B']
-PARAM_CHOICES_DEFAULT_INDICES = [0, len(PARAM_CHOICES) - 1]
-# --- NEW: Define choices for the Top-K dropdown ---
 TOP_K_CHOICES = list(range(5, 51, 5))
 HF_DATASET_ID = "evijit/orgstats_daily_data"
 TAG_FILTER_CHOICES = [ "Audio & Speech", "Time series", "Robotics", "Music", "Video", "Images", "Text", "Biomedical", "Sciences" ]
 PIPELINE_TAGS = [ 'text-generation', 'text-to-image', 'text-classification', 'text2text-generation', 'audio-to-audio', 'feature-extraction', 'image-classification', 'translation', 'reinforcement-learning', 'fill-mask', 'text-to-speech', 'automatic-speech-recognition', 'image-text-to-text', 'token-classification', 'sentence-similarity', 'question-answering', 'image-feature-extraction', 'summarization', 'zero-shot-image-classification', 'object-detection', 'image-segmentation', 'image-to-image', 'image-to-text', 'audio-classification', 'visual-question-answering', 'text-to-video', 'zero-shot-classification', 'depth-estimation', 'text-ranking', 'image-to-video', 'multiple-choice', 'unconditional-image-generation', 'video-classification', 'text-to-audio', 'time-series-forecasting', 'any-to-any', 'video-text-to-text', 'table-question-answering' ]
 def load_models_data():
     overall_start_time = time.time()
     print(f"Attempting to load dataset from Hugging Face Hub: {HF_DATASET_ID}")
     try:
         dataset_dict = load_dataset(HF_DATASET_ID)
-        if not dataset_dict: raise ValueError(f"Dataset '{HF_DATASET_ID}' loaded but appears empty.")
-        split_name = list(dataset_dict.keys())[0]
-        df = dataset_dict[split_name].to_pandas()
-        elapsed = time.time() - overall_start_time
         if 'params' in df.columns:
             df['params'] = pd.to_numeric(df['params'], errors='coerce').fillna(0)
         else:
             df['params'] = 0
-            print("CRITICAL WARNING: 'params' column not found in data. Parameter filtering will not work.")
-        msg = f"Successfully loaded dataset '{HF_DATASET_ID}' (split: {split_name}) from HF Hub in {elapsed:.2f}s. Shape: {df.shape}"
         print(msg)
         return df, True, msg
     except Exception as e:
-        err_msg = f"Failed to load dataset '{HF_DATASET_ID}' from Hugging Face Hub. Error: {e}"
         print(err_msg)
         return pd.DataFrame(), False, err_msg
 def get_param_range_values(param_range_labels):
-    if not param_range_labels or len(param_range_labels) != 2: return None, None
     min_label, max_label = param_range_labels
     min_val = 0.0 if '<' in min_label else float(min_label.replace('B', ''))
     max_val = float('inf') if '>' in max_label else float(max_label.replace('B', ''))
@@ -81,11 +120,18 @@ def create_treemap(treemap_data, count_by, title=None):
     fig.update_traces(textinfo="label+value+percent root", hovertemplate="<b>%{label}</b><br>%{value:,} " + count_by + "<br>%{percentRoot:.2%} of total<extra></extra>")
     return fig
-with gr.Blocks(title="ModelVerse Explorer", fill_width=True) as demo:
     models_data_state = gr.State(pd.DataFrame())
     loading_complete_state = gr.State(False)
-    with gr.Row(): gr.Markdown("# 🤗 The Hub Org-Model Atlas")
     with gr.Row():
         with gr.Column(scale=1):
             count_by_dropdown = gr.Dropdown(label="Metric", choices=[("Downloads (last 30 days)", "downloads"), ("Downloads (All Time)", "downloadsAllTime"), ("Likes", "likes")], value="downloads")
@@ -94,22 +140,20 @@ with gr.Blocks(title="ModelVerse Explorer", fill_width=True) as demo:
             pipeline_filter_dropdown = gr.Dropdown(label="Select Pipeline Tag", choices=PIPELINE_TAGS, value=None, visible=False)
             with gr.Group():
-                with gr.Row():
-                    param_label_display = gr.Markdown("<div style='font-weight: 500;'>Parameters</div>")
-                    reset_params_button = gr.Button("🔄 Reset", visible=False, size="sm", min_width=80)
-                param_slider = gr.Slider(
-                    minimum=0, maximum=len(PARAM_CHOICES) - 1, step=1,
-                    value=PARAM_CHOICES_DEFAULT_INDICES,
-                    label="Parameter Range", show_label=False
-                )
-            # --- MODIFIED: Replaced Slider with Dropdown for Top-K selection ---
-            top_k_dropdown = gr.Dropdown(
-                label="Number of Top Organizations",
-                choices=TOP_K_CHOICES,
-                value=25
-            )
             skip_orgs_textbox = gr.Textbox(label="Organizations to Skip (comma-separated)", value="TheBloke,MaziyarPanahi,unsloth,modularai,Gensyn,bartowski")
             generate_plot_button = gr.Button(value="Generate Plot", variant="primary", interactive=False)
@@ -118,21 +162,6 @@ with gr.Blocks(title="ModelVerse Explorer", fill_width=True) as demo:
             status_message_md = gr.Markdown("Initializing...")
             data_info_md = gr.Markdown("")
-    def _update_slider_ui_elements(current_range_indices):
-        if not isinstance(current_range_indices, list) or len(current_range_indices) != 2: return gr.update(), gr.update()
-        min_idx, max_idx = int(current_range_indices[0]), int(current_range_indices[1])
-        min_label, max_label = PARAM_CHOICES[min_idx], PARAM_CHOICES[max_idx]
-        label_md = f"<div style='font-weight: 500;'>Parameters <span style='float: right; font-weight: normal; color: #555;'>{min_label} to {max_label}</span></div>"
-        is_default = (min_idx == 0 and max_idx == len(PARAM_CHOICES) - 1)
-        return label_md, gr.update(visible=not is_default)
-    def _reset_param_slider_and_ui():
-        default_label = "<div style='font-weight: 500;'>Parameters</div>"
-        return gr.update(value=PARAM_CHOICES_DEFAULT_INDICES), default_label, gr.update(visible=False)
-    param_slider.release(fn=_update_slider_ui_elements, inputs=param_slider, outputs=[param_label_display, reset_params_button])
-    reset_params_button.click(fn=_reset_param_slider_and_ui, outputs=[param_slider, param_label_display, reset_params_button])
     def _update_button_interactivity(is_loaded_flag): return gr.update(interactive=is_loaded_flag)
     loading_complete_state.change(fn=_update_button_interactivity, inputs=loading_complete_state, outputs=generate_plot_button)
@@ -156,21 +185,23 @@ with gr.Blocks(title="ModelVerse Explorer", fill_width=True) as demo:
                 data_info_text = f"### Data Load Failed\n- {status_msg_from_load}"
                 status_msg_ui = status_msg_from_load
         except Exception as e:
-            status_msg_ui = f"An unexpected error occurred during data loading: {str(e)}"
             data_info_text = f"### Critical Error\n- {status_msg_ui}"
             load_success_flag = False
             print(f"Critical error in ui_load_data_controller: {e}")
         return current_df, load_success_flag, data_info_text, status_msg_ui
     def ui_generate_plot_controller(metric_choice, filter_type, tag_choice, pipeline_choice,
-                                   param_range_indices, k_orgs, skip_orgs_input, df_current_models, progress=gr.Progress()):
         if df_current_models is None or df_current_models.empty:
             return create_treemap(pd.DataFrame(), metric_choice, "Error: Model Data Not Loaded"), "Model data is not loaded."
         progress(0.1, desc="Preparing data...")
         tag_to_use = tag_choice if filter_type == "Tag Filter" else None
         pipeline_to_use = pipeline_choice if filter_type == "Pipeline Filter" else None
         orgs_to_skip = [org.strip() for org in skip_orgs_input.split(',') if org.strip()]
         min_label = PARAM_CHOICES[int(param_range_indices[0])]
         max_label = PARAM_CHOICES[int(param_range_indices[1])]
         param_labels_for_filtering = [min_label, max_label]
@@ -186,22 +217,22 @@ with gr.Blocks(title="ModelVerse Explorer", fill_width=True) as demo:
             plot_stats_md = "No data matches the selected filters. Please try different options."
         else:
             total_items_in_plot = len(treemap_df['id'].unique())
-            total_value_in_plot = treemap_df[count_by].sum()
             plot_stats_md = f"## Plot Statistics\n- **Models shown**: {total_items_in_plot:,}\n- **Total {metric_choice}**: {int(total_value_in_plot):,}"
         return plotly_fig, plot_stats_md
-    demo.load(fn=ui_load_data_controller, inputs=[], outputs=[models_data_state, loading_complete_state, data_info_md, status_message_md])
-    # --- MODIFIED: The inputs list now uses top_k_dropdown ---
     generate_plot_button.click(
         fn=ui_generate_plot_controller,
         inputs=[count_by_dropdown, filter_choice_radio, tag_filter_dropdown, pipeline_filter_dropdown,
-                param_slider, top_k_dropdown, skip_orgs_textbox, models_data_state],
         outputs=[plot_output, status_message_md]
     )
 if __name__ == "__main__":
-    print(f"Application starting. Data will be loaded from Hugging Face dataset: {HF_DATASET_ID}")
     demo.queue().launch()
 # --- END OF FINAL POLISHED FILE app.py ---

 import pandas as pd
 import plotly.express as px
 import time
+import json
 from datasets import load_dataset
 # --- Constants ---
 PARAM_CHOICES = ['< 1B', '1B', '5B', '12B', '32B', '64B', '128B', '256B', '> 500B']
+PARAM_CHOICES_DEFAULT_INDICES_JSON = json.dumps([0, len(PARAM_CHOICES) - 1])
 TOP_K_CHOICES = list(range(5, 51, 5))
 HF_DATASET_ID = "evijit/orgstats_daily_data"
 TAG_FILTER_CHOICES = [ "Audio & Speech", "Time series", "Robotics", "Music", "Video", "Images", "Text", "Biomedical", "Sciences" ]
 PIPELINE_TAGS = [ 'text-generation', 'text-to-image', 'text-classification', 'text2text-generation', 'audio-to-audio', 'feature-extraction', 'image-classification', 'translation', 'reinforcement-learning', 'fill-mask', 'text-to-speech', 'automatic-speech-recognition', 'image-text-to-text', 'token-classification', 'sentence-similarity', 'question-answering', 'image-feature-extraction', 'summarization', 'zero-shot-image-classification', 'object-detection', 'image-segmentation', 'image-to-image', 'image-to-text', 'audio-classification', 'visual-question-answering', 'text-to-video', 'zero-shot-classification', 'depth-estimation', 'text-ranking', 'image-to-video', 'multiple-choice', 'unconditional-image-generation', 'video-classification', 'text-to-audio', 'time-series-forecasting', 'any-to-any', 'video-text-to-text', 'table-question-answering' ]
+# --- Custom HTML, CSS, and JavaScript for the Slider ---
+custom_slider_js = """
+function createCustomSlider() {
+    const paramChoices = [<1B>, <1B>, <5B>, <12B>, <32B>, <64B>, <128B>, <256B>, <500B>];
+    const slider = document.getElementById('noui-slider-container');
+    if (slider.noUiSlider) {
+        slider.noUiSlider.destroy();
+    }
+    noUiSlider.create(slider, {
+        start: [0, paramChoices.length - 1],
+        connect: true,
+        step: 1,
+        range: { 'min': 0, 'max': paramChoices.length - 1 },
+        pips: {
+            mode: 'values',
+            values: Array.from(Array(paramChoices.length).keys()),
+            density: 100 / (paramChoices.length - 1),
+            format: { to: function(value) { return paramChoices[value]; } }
+        }
+    });
+    const paramRangeStateInput = document.querySelector('#param-range-state-js textarea');
+    slider.noUiSlider.on('update', function (values) {
+        const intValues = values.map(v => parseInt(v, 10));
+        const newValue = JSON.stringify(intValues);
+        if (paramRangeStateInput.value !== newValue) {
+            paramRangeStateInput.value = newValue;
+            const event = new Event('input', { bubbles: true });
+            paramRangeStateInput.dispatchEvent(event);
+        }
+    });
+    function highlightPips(values) {
+        const intValues = values.map(v => parseInt(v, 10));
+        document.querySelectorAll('.noUi-value').forEach((pip, index) => {
+            const pipIsSelected = index >= intValues[0] && index <= intValues[1];
+            pip.style.fontWeight = pipIsSelected ? 'bold' : 'normal';
+            pip.style.color = pipIsSelected ? '#000' : '#777';
+        });
+    }
+    slider.noUiSlider.on('update', highlightPips);
+    highlightPips([0, paramChoices.length - 1]);
+}
+"""
 def load_models_data():
     overall_start_time = time.time()
     print(f"Attempting to load dataset from Hugging Face Hub: {HF_DATASET_ID}")
     try:
         dataset_dict = load_dataset(HF_DATASET_ID)
+        df = dataset_dict[list(dataset_dict.keys())[0]].to_pandas()
         if 'params' in df.columns:
             df['params'] = pd.to_numeric(df['params'], errors='coerce').fillna(0)
         else:
             df['params'] = 0
+        msg = f"Successfully loaded dataset in {time.time() - overall_start_time:.2f}s."
         print(msg)
         return df, True, msg
     except Exception as e:
+        err_msg = f"Failed to load dataset. Error: {e}"
         print(err_msg)
         return pd.DataFrame(), False, err_msg
 def get_param_range_values(param_range_labels):
     min_label, max_label = param_range_labels
     min_val = 0.0 if '<' in min_label else float(min_label.replace('B', ''))
     max_val = float('inf') if '>' in max_label else float(max_label.replace('B', ''))
     fig.update_traces(textinfo="label+value+percent root", hovertemplate="<b>%{label}</b><br>%{value:,} " + count_by + "<br>%{percentRoot:.2%} of total<extra></extra>")
     return fig
+custom_head = """
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/noUiSlider/15.7.1/nouislider.min.css">
+<script src="https://cdnjs.cloudflare.com/ajax/libs/noUiSlider/15.7.1/nouislider.min.js"></script>
+"""
+# --- MODIFIED: Added emoji to the browser tab title ---
+with gr.Blocks(title="🤗 ModelVerse Explorer", fill_width=True, head=custom_head) as demo:
     models_data_state = gr.State(pd.DataFrame())
     loading_complete_state = gr.State(False)
+    # --- MODIFIED: Removed the main title from the page body for a cleaner look ---
     with gr.Row():
         with gr.Column(scale=1):
             count_by_dropdown = gr.Dropdown(label="Metric", choices=[("Downloads (last 30 days)", "downloads"), ("Downloads (All Time)", "downloadsAllTime"), ("Likes", "likes")], value="downloads")
             pipeline_filter_dropdown = gr.Dropdown(label="Select Pipeline Tag", choices=PIPELINE_TAGS, value=None, visible=False)
             with gr.Group():
+                gr.Markdown("<div style='font-weight: 500;'>Parameters</div>")
+                gr.HTML("""
+                    <div id="noui-slider-container" style="margin: 2rem 1rem;"></div>
+                    <style>
+                        .noUi-value { font-size: 12px; }
+                        .noUi-pips-horizontal { padding: 10px 0; height: 50px; }
+                        .noUi-connect { background: #333; }
+                        .noUi-handle { border-radius: 50%; width: 20px; height: 20px; right: -10px; top: -7px; box-shadow: none; border: 2px solid #333; background: #FFF; cursor: pointer; }
+                        .noUi-handle:focus { outline: none; }
+                    </style>
+                """)
+                param_range_state_js = gr.Textbox(value=PARAM_CHOICES_DEFAULT_INDICES_JSON, visible=False, elem_id="param-range-state-js")
+            top_k_dropdown = gr.Dropdown(label="Number of Top Organizations", choices=TOP_K_CHOICES, value=25)
             skip_orgs_textbox = gr.Textbox(label="Organizations to Skip (comma-separated)", value="TheBloke,MaziyarPanahi,unsloth,modularai,Gensyn,bartowski")
             generate_plot_button = gr.Button(value="Generate Plot", variant="primary", interactive=False)
             status_message_md = gr.Markdown("Initializing...")
             data_info_md = gr.Markdown("")
     def _update_button_interactivity(is_loaded_flag): return gr.update(interactive=is_loaded_flag)
     loading_complete_state.change(fn=_update_button_interactivity, inputs=loading_complete_state, outputs=generate_plot_button)
                 data_info_text = f"### Data Load Failed\n- {status_msg_from_load}"
                 status_msg_ui = status_msg_from_load
         except Exception as e:
+            status_msg_ui = f"An unexpected error occurred: {str(e)}"
             data_info_text = f"### Critical Error\n- {status_msg_ui}"
             load_success_flag = False
             print(f"Critical error in ui_load_data_controller: {e}")
         return current_df, load_success_flag, data_info_text, status_msg_ui
     def ui_generate_plot_controller(metric_choice, filter_type, tag_choice, pipeline_choice,
+                                   param_range_json, k_orgs, skip_orgs_input, df_current_models, progress=gr.Progress()):
         if df_current_models is None or df_current_models.empty:
             return create_treemap(pd.DataFrame(), metric_choice, "Error: Model Data Not Loaded"), "Model data is not loaded."
         progress(0.1, desc="Preparing data...")
         tag_to_use = tag_choice if filter_type == "Tag Filter" else None
         pipeline_to_use = pipeline_choice if filter_type == "Pipeline Filter" else None
         orgs_to_skip = [org.strip() for org in skip_orgs_input.split(',') if org.strip()]
+        param_range_indices = json.loads(param_range_json)
         min_label = PARAM_CHOICES[int(param_range_indices[0])]
         max_label = PARAM_CHOICES[int(param_range_indices[1])]
         param_labels_for_filtering = [min_label, max_label]
             plot_stats_md = "No data matches the selected filters. Please try different options."
         else:
             total_items_in_plot = len(treemap_df['id'].unique())
+            total_value_in_plot = treemap_df[metric_choice].sum()
             plot_stats_md = f"## Plot Statistics\n- **Models shown**: {total_items_in_plot:,}\n- **Total {metric_choice}**: {int(total_value_in_plot):,}"
         return plotly_fig, plot_stats_md
+    demo.load(fn=ui_load_data_controller, inputs=[], outputs=[models_data_state, loading_complete_state, data_info_md, status_message_md]) \
+       .then(fn=None, _js=custom_slider_js.replace("<", "'<").replace(">", "'"))
     generate_plot_button.click(
         fn=ui_generate_plot_controller,
         inputs=[count_by_dropdown, filter_choice_radio, tag_filter_dropdown, pipeline_filter_dropdown,
+                param_range_state_js, top_k_dropdown, skip_orgs_textbox, models_data_state],
         outputs=[plot_output, status_message_md]
     )
 if __name__ == "__main__":
+    print(f"Application starting...")
     demo.queue().launch()
 # --- END OF FINAL POLISHED FILE app.py ---