import matplotlib.pyplot as plt import matplotlib import numpy as np import gradio as gr # Configure matplotlib to prevent memory warnings and set dark background matplotlib.rcParams['figure.max_open_warning'] = 0 matplotlib.rcParams['figure.facecolor'] = '#000000' matplotlib.rcParams['axes.facecolor'] = '#000000' matplotlib.rcParams['savefig.facecolor'] = '#000000' plt.ioff() # Turn off interactive mode to prevent figure accumulation # Sample test results with test names MODELS = { "llama": { "amd": { "passed": ["auth_login", "data_validation", "api_response", "file_upload", "cache_hit", "user_permissions", "db_query", "session_mgmt", "input_sanitize", "rate_limit", "error_handling", "memory_alloc", "thread_safety", "backup_restore"], "failed": ["network_timeout"], "skipped": ["gpu_accel", "cuda_ops", "ml_inference", "tensor_ops", "distributed", "multi_gpu"], "error": [] }, "nvidia": { "passed": ["auth_login", "data_validation", "api_response", "file_upload", "cache_hit", "user_permissions", "db_query", "session_mgmt", "input_sanitize", "rate_limit", "error_handling", "memory_alloc", "thread_safety", "backup_restore", "gpu_accel", "cuda_ops", "ml_inference", "tensor_ops"], "failed": ["network_timeout", "distributed"], "skipped": ["multi_gpu"], "error": [] } }, "gemma3": { "amd": { "passed": ["auth_login", "data_validation", "api_response", "file_upload", "cache_hit", "user_permissions", "db_query", "session_mgmt", "input_sanitize", "rate_limit", "error_handling", "memory_alloc", "thread_safety", "backup_restore", "config_load", "log_rotation", "health_check", "metrics", "alerts", "monitoring", "security_scan", "password_hash", "jwt_token", "oauth_flow", "csrf_protect", "xss_filter", "sql_injection", "rate_limiter", "load_balance", "circuit_break", "retry_logic", "timeout_handle", "graceful_shutdown", "hot_reload", "config_watch", "env_vars", "secrets_mgmt", "tls_cert", "encryption", "compression", "serialization", "deserialization", "validation"], "failed": ["gpu_accel", "cuda_ops", "ml_inference", "tensor_ops", "distributed", "multi_gpu", "opencl_init", "driver_conflict", "memory_bandwidth", "compute_units", "rocm_version", "hip_compile", "kernel_launch", "buffer_transfer", "atomic_ops", "wavefront_sync"], "skipped": ["perf_test", "stress_test", "load_test", "endurance", "benchmark", "profiling", "memory_leak", "cpu_usage", "disk_io", "network_bw", "latency", "throughput"], "error": [] }, "nvidia": { "passed": ["auth_login", "data_validation", "api_response", "file_upload", "cache_hit", "user_permissions", "db_query", "session_mgmt", "input_sanitize", "rate_limit", "error_handling", "memory_alloc", "thread_safety", "backup_restore", "config_load", "log_rotation", "health_check", "metrics", "alerts", "monitoring", "security_scan", "password_hash", "jwt_token", "oauth_flow", "csrf_protect", "xss_filter", "sql_injection", "rate_limiter", "load_balance", "circuit_break", "retry_logic", "timeout_handle", "graceful_shutdown", "hot_reload", "config_watch", "env_vars", "secrets_mgmt", "tls_cert", "encryption", "compression", "serialization", "deserialization", "validation", "gpu_accel", "cuda_ops", "ml_inference", "tensor_ops"], "failed": ["distributed", "multi_gpu", "cuda_version", "nvcc_compile", "stream_sync", "device_reset", "peer_access", "unified_memory", "texture_bind", "surface_write", "constant_mem", "shared_mem"], "skipped": ["perf_test", "stress_test", "load_test", "endurance", "benchmark", "profiling", "memory_leak", "cpu_usage", "disk_io", "network_bw"], "error": [] } }, "csm": { "amd": { "passed": [], "failed": [], "skipped": [], "error": ["system_crash"] }, "nvidia": { "passed": [], "failed": [], "skipped": [], "error": ["system_crash"] } } } def generate_underlined_line(text: str) -> str: return text + "\n" + "─" * len(text) + "\n" def plot_model_stats(model_name: str) -> tuple[plt.Figure, str, str]: """Draws a pie chart of model's passed, failed, skipped, and error stats.""" model_stats = MODELS[model_name] # Softer color palette - less pastel, more vibrant colors = { 'passed': '#4CAF50', # Medium green 'failed': '#E53E3E', # More red 'skipped': '#FFD54F', # Medium yellow 'error': '#8B0000' # Dark red } # Convert test lists to counts for chart display amd_stats = {k: len(v) for k, v in model_stats['amd'].items()} nvidia_stats = {k: len(v) for k, v in model_stats['nvidia'].items()} # Filter out categories with 0 values for cleaner visualization amd_filtered = {k: v for k, v in amd_stats.items() if v > 0} nvidia_filtered = {k: v for k, v in nvidia_stats.items() if v > 0} if not amd_filtered and not nvidia_filtered: # Handle case where all values are 0 - minimal empty state fig, ax = plt.subplots(figsize=(10, 8), facecolor='#000000') ax.set_facecolor('#000000') ax.text(0.5, 0.5, 'No test results available', horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=16, color='#888888', fontfamily='monospace', weight='normal') ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.axis('off') return fig, "", "" # Create figure with two subplots side by side with padding fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 9), facecolor='#000000') ax1.set_facecolor('#000000') ax2.set_facecolor('#000000') def create_pie_chart(ax, device_label, filtered_stats): if not filtered_stats: ax.text(0.5, 0.5, 'No test results', horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=14, color='#888888', fontfamily='monospace', weight='normal') ax.set_title(device_label, fontsize=28, weight='bold', pad=2, color='#FFFFFF', fontfamily='monospace') ax.axis('off') return chart_colors = [colors[category] for category in filtered_stats.keys()] # Create minimal pie chart - full pie, no donut effect wedges, texts, autotexts = ax.pie( filtered_stats.values(), labels=[label.lower() for label in filtered_stats.keys()], # Lowercase for minimal look colors=chart_colors, autopct=lambda pct: f'{int(pct/100*sum(filtered_stats.values()))}', startangle=90, explode=None, # No separation shadow=False, wedgeprops=dict(edgecolor='#1a1a1a', linewidth=0.5), # Minimal borders textprops={'fontsize': 12, 'weight': 'normal', 'color': '#CCCCCC', 'fontfamily': 'monospace'} ) # Enhanced percentage text styling for better readability for autotext in autotexts: autotext.set_color('#000000') # Black text for better contrast autotext.set_weight('bold') autotext.set_fontsize(14) autotext.set_fontfamily('monospace') # Minimal category labels for text in texts: text.set_color('#AAAAAA') text.set_weight('normal') text.set_fontsize(13) text.set_fontfamily('monospace') # Device label closer to chart and bigger ax.set_title(device_label, fontsize=28, weight='normal', pad=2, color='#FFFFFF', fontfamily='monospace') # Create both pie charts with device labels create_pie_chart(ax1, "amd", amd_filtered) create_pie_chart(ax2, "nvidia", nvidia_filtered) # Add subtle separation line between charts - stops at device labels level line_x = 0.5 fig.add_artist(plt.Line2D([line_x, line_x], [0.0, 0.85], color='#333333', linewidth=1, alpha=0.5, transform=fig.transFigure)) # Add central shared title for model name fig.suptitle(f'{model_name.lower()}', fontsize=32, weight='bold', color='#CCCCCC', fontfamily='monospace', y=0.95) # Clean layout with padding and space for central title plt.tight_layout() plt.subplots_adjust(top=0.85, wspace=0.4) # Added wspace for padding between charts # Generate separate failed tests info for AMD and NVIDIA with exclusive/common separation amd_failed = set(model_stats['amd']['failed']) nvidia_failed = set(model_stats['nvidia']['failed']) # Find exclusive and common failures amd_exclusive = amd_failed - nvidia_failed nvidia_exclusive = nvidia_failed - amd_failed common_failures = amd_failed & nvidia_failed # Build AMD info amd_failed_info = "" if not amd_exclusive and not common_failures: msg = "Error(s) detected" if model_stats["amd"]["error"] else "No failures" amd_failed_info += generate_underlined_line(msg) if amd_exclusive: amd_failed_info += generate_underlined_line("Failures on AMD (exclusive):") amd_failed_info += "\n".join(sorted(amd_exclusive)) amd_failed_info += "\n\n" if common_failures else "" if common_failures: amd_failed_info += generate_underlined_line("Failures on AMD (common):") amd_failed_info += "\n".join(sorted(common_failures)) # Build NVIDIA info nvidia_failed_info = "" if not nvidia_exclusive and not common_failures: msg = "Error(s) detected" if model_stats["nvidia"]["error"] else "No failures" nvidia_failed_info += generate_underlined_line(msg) if nvidia_exclusive: nvidia_failed_info += generate_underlined_line("Failures on NVIDIA (exclusive):") nvidia_failed_info += "\n".join(sorted(nvidia_exclusive)) nvidia_failed_info += "\n\n" if common_failures else "" if common_failures: nvidia_failed_info += generate_underlined_line("Failures on NVIDIA (common):") nvidia_failed_info += "\n".join(sorted(common_failures)) return fig, amd_failed_info, nvidia_failed_info def get_model_stats_summary(model_name: str) -> tuple: """Get summary stats for a model (total tests, success rate, status indicator).""" stats = MODELS[model_name] # Combine AMD and NVIDIA results total_passed = len(stats['amd']['passed']) + len(stats['nvidia']['passed']) total_failed = len(stats['amd']['failed']) + len(stats['nvidia']['failed']) total_skipped = len(stats['amd']['skipped']) + len(stats['nvidia']['skipped']) total_error = len(stats['amd']['error']) + len(stats['nvidia']['error']) total = total_passed + total_failed + total_skipped + total_error success_rate = (total_passed / total * 100) if total > 0 else 0 # Determine status indicator color if success_rate >= 80: status_class = "success-high" elif success_rate >= 50: status_class = "success-medium" else: status_class = "success-low" return total, success_rate, status_class # Custom CSS for dark theme dark_theme_css = """ /* Global dark theme */ .gradio-container { background-color: #000000 !important; color: white !important; } /* Remove borders from all components */ .gr-box, .gr-form, .gr-panel { border: none !important; background-color: #000000 !important; } /* Sidebar styling */ .sidebar { background: linear-gradient(145deg, #111111, #1a1a1a) !important; border: none !important; padding: 25px !important; box-shadow: inset 2px 2px 5px rgba(0, 0, 0, 0.3) !important; margin: 0 !important; height: 100vh !important; position: fixed !important; left: 0 !important; top: 0 !important; width: 300px !important; } /* Enhanced model button styling */ .model-button { background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important; color: white !important; border: 2px solid transparent !important; margin: 2px 0 !important; border-radius: 5px !important; padding: 8px 12px !important; transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1) !important; position: relative !important; overflow: hidden !important; box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2), inset 0 1px 0 rgba(255, 255, 255, 0.1) !important; font-weight: 600 !important; font-size: 16px !important; text-transform: uppercase !important; letter-spacing: 0.5px !important; font-family: monospace !important; } .model-button:hover { background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important; color: #74b9ff !important; } .model-button:active { background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important; color: #5a9bd4 !important; } /* Model stats badge */ .model-stats { display: flex !important; justify-content: space-between !important; align-items: center !important; margin-top: 8px !important; font-size: 12px !important; opacity: 0.8 !important; } .stats-badge { background: rgba(116, 185, 255, 0.2) !important; padding: 4px 8px !important; border-radius: 10px !important; font-weight: 500 !important; font-size: 11px !important; color: #74b9ff !important; } .success-indicator { width: 8px !important; height: 8px !important; border-radius: 50% !important; display: inline-block !important; margin-right: 6px !important; } .success-high { background-color: #4CAF50 !important; } .success-medium { background-color: #FF9800 !important; } .success-low { background-color: #F44336 !important; } /* Regular button styling for non-model buttons */ .gr-button:not(.model-button) { background-color: #222222 !important; color: white !important; border: 1px solid #444444 !important; margin: 5px 0 !important; border-radius: 8px !important; transition: all 0.3s ease !important; } .gr-button:not(.model-button):hover { background-color: #333333 !important; border-color: #666666 !important; } /* Plot container with smooth transitions */ .plot-container { background-color: #000000 !important; border: none !important; transition: opacity 0.6s ease-in-out !important; } /* Gradio plot component styling */ .gr-plot { background-color: #000000 !important; transition: opacity 0.6s ease-in-out !important; } .gr-plot .gradio-plot { background-color: #000000 !important; transition: opacity 0.6s ease-in-out !important; } .gr-plot img { transition: opacity 0.6s ease-in-out !important; } /* Target the plot wrapper */ div[data-testid="plot"] { background-color: #000000 !important; } /* Target all possible plot containers */ .plot-container img, .gr-plot img, .gradio-plot img { background-color: #000000 !important; } /* Ensure plot area background */ .gr-plot > div, .plot-container > div { background-color: #000000 !important; } /* Prevent white flash during plot updates */ .plot-container::before { content: ""; position: absolute; top: 0; left: 0; right: 0; bottom: 0; background-color: #000000; z-index: -1; } /* Force all plot elements to have black background */ .plot-container *, .gr-plot *, div[data-testid="plot"] * { background-color: #000000 !important; } /* Override any white backgrounds in matplotlib */ .plot-container canvas, .gr-plot canvas { background-color: #000000 !important; } /* Text elements */ h1, h2, h3, p, .markdown { color: white !important; } /* Sidebar header enhancement */ .sidebar h1 { background: linear-gradient(45deg, #74b9ff, #a29bfe) !important; -webkit-background-clip: text !important; -webkit-text-fill-color: transparent !important; background-clip: text !important; text-align: center !important; margin-bottom: 15px !important; font-size: 28px !important; font-weight: 700 !important; font-family: monospace !important; } /* Sidebar description text */ .sidebar p { text-align: center !important; margin-bottom: 20px !important; line-height: 1.5 !important; font-size: 14px !important; font-family: monospace !important; } .sidebar strong { color: #74b9ff !important; font-weight: 600 !important; font-family: monospace !important; } .sidebar em { color: #a29bfe !important; font-style: normal !important; opacity: 0.9 !important; font-family: monospace !important; } /* Remove all borders globally */ * { border-color: transparent !important; } /* Main content area */ .main-content { background-color: #000000 !important; padding: 20px !important; margin-left: 300px !important; } /* Failed tests display - seamless appearance with fixed height */ .failed-tests textarea { background-color: #000000 !important; color: #FFFFFF !important; font-family: monospace !important; font-size: 14px !important; border: none !important; padding: 10px !important; outline: none !important; line-height: 1.4 !important; height: 200px !important; max-height: 200px !important; min-height: 200px !important; overflow-y: auto !important; resize: none !important; scrollbar-width: thin !important; scrollbar-color: #333333 #000000 !important; scroll-behavior: auto; transition: opacity 0.5s ease-in-out !important; } /* WebKit scrollbar styling for failed tests */ .failed-tests textarea::-webkit-scrollbar { width: 8px !important; } .failed-tests textarea::-webkit-scrollbar-track { background: #000000 !important; } .failed-tests textarea::-webkit-scrollbar-thumb { background-color: #333333 !important; border-radius: 4px !important; } .failed-tests textarea::-webkit-scrollbar-thumb:hover { background-color: #555555 !important; } /* Prevent white flash in text boxes during updates */ .failed-tests::before { content: ""; position: absolute; top: 0; left: 0; right: 0; bottom: 0; background-color: #000000; z-index: -1; } .failed-tests { background-color: #000000 !important; height: 220px !important; max-height: 220px !important; position: relative; transition: opacity 0.5s ease-in-out !important; } .failed-tests .gr-textbox { background-color: #000000 !important; border: none !important; height: 200px !important; max-height: 200px !important; transition: opacity 0.5s ease-in-out !important; } /* Force all textbox elements to have black background */ .failed-tests *, .failed-tests .gr-textbox *, .failed-tests textarea * { background-color: #000000 !important; } /* JavaScript to reset scroll position */ .scroll-reset { animation: resetScroll 0.1s ease; } @keyframes resetScroll { 0% { scroll-behavior: auto; } 100% { scroll-behavior: auto; } } """ # Create the Gradio interface with sidebar and dark theme with gr.Blocks(title="Model Test Results Dashboard", css=dark_theme_css) as demo: with gr.Row(): # Sidebar for model selection with gr.Column(scale=1, elem_classes=["sidebar"]): gr.Markdown("# šŸ¤– AI Models") gr.Markdown("**Select a model to analyze test results**\n\n*Interactive dashboard with detailed metrics*") # Model selection buttons in sidebar model_buttons = [] for model_name in MODELS.keys(): btn = gr.Button( f"{model_name.lower()}", variant="secondary", size="lg", elem_classes=["model-button"] ) model_buttons.append(btn) # Main content area with gr.Column(scale=4, elem_classes=["main-content"]): gr.Markdown("# šŸ“ˆ Test Results Dashboard") # Create the plot output plot_output = gr.Plot( label="", format="png", elem_classes=["plot-container"] ) # Create two separate failed tests displays in a row layout with gr.Row(): with gr.Column(scale=1): amd_failed_tests_output = gr.Textbox( value="Failures on AMD (exclusive):\n─────────────────────────────\nnetwork_timeout\n\nFailures on AMD (common):\n────────────────────────\ndistributed", lines=8, max_lines=8, interactive=False, container=False, elem_classes=["failed-tests"] ) with gr.Column(scale=1): nvidia_failed_tests_output = gr.Textbox( value="Failures on NVIDIA (exclusive):\n─────────────────────────────────\nmulti_gpu\n\nFailures on NVIDIA (common):\n────────────────────────────\ndistributed", lines=8, max_lines=8, interactive=False, container=False, elem_classes=["failed-tests"] ) # Set up click handlers for each button for i, (model_name, button) in enumerate(zip(MODELS.keys(), model_buttons)): button.click( fn=lambda name=model_name: plot_model_stats(name), outputs=[plot_output, amd_failed_tests_output, nvidia_failed_tests_output] ).then( fn=None, js="() => { setTimeout(() => { document.querySelectorAll('textarea').forEach(t => { if (t.closest('.failed-tests')) { t.scrollTop = 0; setTimeout(() => { t.style.scrollBehavior = 'smooth'; t.scrollTo({ top: 0, behavior: 'smooth' }); t.style.scrollBehavior = 'auto'; }, 50); } }); }, 300); }" ) # Initialize with the first model demo.load( fn=lambda: plot_model_stats(list(MODELS.keys())[0]), outputs=[plot_output, amd_failed_tests_output, nvidia_failed_tests_output] ) if __name__ == "__main__": demo.launch()