import os import gradio as gr import json import plotly import plotly.graph_objects as go from typing import Optional import modal from dotenv import load_dotenv from mcp_server import app, MCPServer, parallel_model_query, consensus_algorithm, disagreement_analyzer, synthesize_consensus from consensus_logic import ConsensusAnalyzer import PyPDF2 import docx # Load environment variables load_dotenv() # Initialize components server = MCPServer() consensus_analyzer = ConsensusAnalyzer() def process_document(file_obj) -> Optional[str]: """Process uploaded document and return its content.""" if file_obj is None: return None try: file_path = file_obj.name file_extension = os.path.splitext(file_path)[1].lower() if file_extension == '.pdf': # Process PDF file with open(file_path, 'rb') as file: pdf_reader = PyPDF2.PdfReader(file) text = "" for page in pdf_reader.pages: text += page.extract_text() + "\n" return text.strip() elif file_extension == '.txt': # Process text file with open(file_path, 'r', encoding='utf-8') as file: return file.read().strip() elif file_extension == '.docx': # Process Word document doc = docx.Document(file_path) text = "" for paragraph in doc.paragraphs: text += paragraph.text + "\n" return text.strip() else: raise ValueError(f"Unsupported file type: {file_extension}") except Exception as e: print(f"Error processing document: {str(e)}") return None def create_heatmap(similarity_matrix, model_names): """Create a Plotly heatmap from similarity matrix.""" fig = go.Figure(data=go.Heatmap( z=similarity_matrix, x=model_names, y=model_names, colorscale='Viridis', zmin=0, zmax=1 )) fig.update_layout( title='Model Response Similarity Matrix', xaxis_title='Models', yaxis_title='Models', height=500, width=500 ) return fig def create_comparison_table(disagreement_result): """Create a comparison table showing model contradictions with enhanced details.""" if not disagreement_result or "disagreements" not in disagreement_result: return [] table_rows = [] for disagreement in disagreement_result["disagreements"]: cluster1_models = ", ".join(disagreement["cluster1"]) cluster2_models = ", ".join(disagreement["cluster2"]) # Format model names for display cluster1_display = [model.split('/')[-1] for model in disagreement["cluster1"]] cluster2_display = [model.split('/')[-1] for model in disagreement["cluster2"]] # Get similarity score if available similarity_score = disagreement.get("similarity_score", "N/A") if isinstance(similarity_score, float): similarity_display = f"{similarity_score:.3f}" else: similarity_display = str(similarity_score) # Get disagreement points if available disagreement_points = disagreement.get("disagreement_points", []) if disagreement_points: points_display = "; ".join(disagreement_points[:2]) # Show first 2 points else: points_display = "No specific points identified" table_rows.append([ ", ".join(cluster1_display), ", ".join(cluster2_display), disagreement["type"], f"Similarity: {similarity_display} | {disagreement['explanation']}", points_display ]) return table_rows def _format_topics(topics: dict) -> str: """Format topic analysis for display.""" if not topics: return "No specific topics identified." topic_lines = [] for topic, models in topics.items(): model_names = [model.split('/')[-1] for model in models] # Extract model name from full path topic_lines.append(f"- **{topic.title()}:** {', '.join(model_names)}") return "\n".join(topic_lines) def _format_confidence_analysis(confidence_analysis: dict) -> str: """Format confidence analysis for display.""" if not confidence_analysis: return "No confidence analysis available." most_confident = confidence_analysis.get("most_confident_model", "Unknown") least_confident = confidence_analysis.get("least_confident_model", "Unknown") confidence_variance = confidence_analysis.get("confidence_variance", 0) # Format model names for display most_confident_name = most_confident.split('/')[-1] if '/' in most_confident else most_confident least_confident_name = least_confident.split('/')[-1] if '/' in least_confident else least_confident return f""" - **Most Confident Model:** {most_confident_name} - **Least Confident Model:** {least_confident_name} - **Confidence Variance:** {confidence_variance:.3f} """ def _format_disagreement_summary(disagreement_summary: dict) -> str: """Format disagreement summary for display.""" if not disagreement_summary: return "No disagreement summary available." total_disagreements = disagreement_summary.get("total_disagreements", 0) disagreement_types = disagreement_summary.get("disagreement_types", {}) avg_similarity = disagreement_summary.get("avg_similarity", 0) if total_disagreements == 0: return "No disagreements detected - models are in strong agreement." type_lines = [] for d_type, count in disagreement_types.items(): type_lines.append(f"- {count} {d_type}") return f""" - **Total Disagreements:** {total_disagreements} - **Average Similarity:** {avg_similarity:.3f} - **Disagreement Types:** {chr(10).join(type_lines)} """ def query_models(question: str, file_obj: Optional[gr.File]) -> tuple: """Process the query and return results.""" try: # Get API key from environment variables nebius_api_key = os.environ.get("NEBIUS_API_KEY") if not nebius_api_key: return ( [["Error", "API Key not set", "0%", "N/A"]], None, "## Error\nNEBIUS_API_KEY environment variable not set. Please ensure it's configured as a Modal Secret or in your local .env file.", [], "Error: API Key Missing" ) # Append instruction for concise, critical answer instruction = " Answer under 100 words and be critical." if question.strip().endswith(('.', '?', '!')): question = question.strip() + instruction else: question = question.strip() + '. ' + instruction # Process document if provided document_content = process_document(file_obj) # Query models using Modal with app.run() as app_instance: try: # Get model responses result = parallel_model_query.remote(question, server.models, document_content) if not result or "responses" not in result: raise Exception("Invalid response from model query") # Format results for display model_responses = [] for model, response in result["responses"].items(): if isinstance(response, dict): model_responses.append([ model, response.get("response", "Error"), f"{response.get('confidence', 0)*100:.1f}%" if response.get("status") == "success" else "N/A", "N/A" # Disagreement explanation will be updated later ]) # Calculate consensus and analyze disagreements try: consensus_result = consensus_algorithm.remote(result["responses"]) disagreement_result = disagreement_analyzer.remote(result["responses"], api_key=nebius_api_key) # Update model responses with disagreement info for i, (model, _) in enumerate(result["responses"].items()): if i < len(model_responses): model_responses[i][3] = disagreement_result.get("explanation", "No explanation available") # Create heatmap visualization similarity_matrix = consensus_result.get("similarity_matrix", []) model_names = list(result["responses"].keys()) heatmap_fig = create_heatmap(similarity_matrix, model_names) # Create comparison table print(f"Disagreement Result: {disagreement_result}") comparison_table = create_comparison_table(disagreement_result) # Synthesize consensus response synthesized_response = synthesize_consensus.remote(result["responses"], disagreement_result.get("disagreements", []), api_key=nebius_api_key) except Exception as e: print(f"Error in consensus/disagreement analysis: {str(e)}") heatmap_fig = None comparison_table = [] synthesized_response = "Error synthesizing consensus response." # Create final consensus explanation consensus_explanation = f""" ## Consensus Analysis **Overall Consensus Score:** {consensus_result.get('consensus_score', 0)*100:.1f}% ### Enhanced Disagreement Analysis {disagreement_result.get('explanation', 'No explanation available')} ### Topic Analysis {_format_topics(disagreement_result.get('topics', {}))} ### Confidence Analysis {_format_confidence_analysis(disagreement_result.get('confidence_analysis', {}))} ### Disagreement Summary {_format_disagreement_summary(disagreement_result.get('disagreement_summary', {}))} """ return ( model_responses, # Dataframe data heatmap_fig, # Plot data consensus_explanation, # Markdown comparison_table, # Comparison table synthesized_response, # Synthesized response "Processing complete!" # Status ) except Exception as e: return ( [["Error", f"Modal execution failed: {str(e)}", "0%", "N/A"]], None, "## Error\nAn error occurred while processing your request.", [], "Error synthesizing consensus response.", f"Error: {str(e)}" ) except Exception as e: return ( [["Error", f"An error occurred: {str(e)}", "0%", "N/A"]], None, "## Error\nAn unexpected error occurred.", [], "Error synthesizing consensus response.", f"Error: {str(e)}" ) # Enhanced CSS with title styling and improved layout CUSTOM_CSS = """ @import url('https://fonts.googleapis.com/css2?family=Open+Sans:wght@400;700&display=swap'); body, .gradio-container, .gradio-input, .gradio-dataframe, .gradio-dataframe table, .gradio-dataframe th, .gradio-dataframe td, .gr-button, .gr-markdown, .gradio-accordion, input, textarea, select, button { font-family: "Open Sans", sans-serif !important; } body, .gradio-container, .gradio-input, .gradio-dataframe, .gradio-dataframe table, .gradio-dataframe th, .gradio-dataframe td, .gr-button, .gr-markdown, .gradio-accordion, input, textarea, select, button { font-family: "Open Sans", sans-serif !important; } /* Enhanced title styling with white border */ .main-title { text-align: center; font-size: 4rem; font-weight: bold; color: #2c3e50; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; padding: 10px 0px; # border: 4px solid white; border-radius: 15px; box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1); margin: 5px auto 0 auto; background-color: rgba(255, 255, 255, 0.9); backdrop-filter: blur(10px); } .main-title-2 { text-align: center; font-size: 1.5rem; font-weight: bold; color: black; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; padding: 20px 30px; # border: 4px solid white; border-radius: 15px; box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1); margin: 20px 0; background-color: rgba(255, 255, 255, 0.9); backdrop-filter: blur(10px); } /* Info section styling */ .info-section { background-color: #171717; # border: 3px solid #e2e8f0; border-radius: 12px; padding: 25px; margin: 10px 0; margin-top: 5px !important; box-shadow: 0 4px 16px rgba(0, 0, 0, 0.1); line-height: 1.6; color: #ffffff; font-size:1.1rem; } .info-section p { color: white; font-size: 1.1rem; } .info-section ul { margin-left: 20px; } .info-section li { margin-bottom: 8px; color: #ffffff; } /* Container improvements */ .gradio-container { border-radius: 10px; box-shadow: 0 4px 8px rgba(0,0,0,0.1); } /* Button enhancements */ .gr-button { border-radius: 10px; transition: all 0.3s ease; font-weight: 600; text-transform: uppercase; letter-spacing: 0.5px; } .gr-button:hover { background-color: var(--button-secondary-background-fill-hover); transform: translateY(-2px); box-shadow: 0 4px 12px rgba(0,0,0,0.15); } /* Input field improvements */ .gradio-input { border-radius: 10px; padding: 12px 16px; border: 2px solid #e2e8f0; transition: border-color 0.3s ease; } .gradio-input:focus { border-color: #667eea; box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1); } /* Dataframe styling */ .gradio-dataframe { border-radius: 12px; overflow: hidden; box-shadow: 0 4px 16px rgba(0,0,0,0.1); } .gradio-dataframe table { border-collapse: separate; border-spacing: 0; } .gradio-dataframe th, .gradio-dataframe td { padding: 15px 18px; border-bottom: 1px solid var(--border-color-primary); } .gradio-dataframe th { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; font-weight: bold; text-transform: uppercase; letter-spacing: 0.5px; } .gradio-dataframe tr:nth-child(even) { background-color: rgba(102, 126, 234, 0.05); } .gradio-dataframe tr:last-child td { border-bottom: none; } /* Markdown improvements */ .gr-markdown { padding: 20px; border-radius: 12px; background: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%); border: 1px solid #cbd5e0; box-shadow: 0 2px 8px rgba(0,0,0,0.05); } /* Accordion styling */ .gradio-accordion { margin-bottom: 15px; border-radius: 10px; overflow: hidden; box-shadow: 0 2px 8px rgba(0,0,0,0.1); } /* Status indicator */ .status-indicator { background: linear-gradient(45deg, #4CAF50, #45a049); color: white; padding: 10px 20px; border-radius: 25px; font-weight: bold; text-align: center; box-shadow: 0 2px 10px rgba(76, 175, 80, 0.3); } #footer { position: sticky; bottom: 0; width: 100%; background-color: #111; color: #ccc; text-align: center; font-size: 0.85rem; padding: 10px 20px; z-index: 9999; box-shadow: 0 -2px 10px rgba(0, 0, 0, 0.3); } /* Remove default top margin between Gradio blocks */ div.svelte-1ipelgc { margin-top: 0px !important; } /* Remove extra vertical space between top-level blocks */ .gradio-container > div { margin-top: 0px !important; padding-top: 0px !important; } """ with gr.Blocks( title="Multi-Model Consensus Builder", theme=gr.themes.Monochrome(), analytics_enabled=False, css=CUSTOM_CSS ) as demo: # Enhanced title with white border gr.HTML("""
What is this? Such.AI lets you query multiple top-tier AI models (Llama, Gemma, Qwen, and more) at once, then analyzes and visualizes their agreement, confidence, and disagreements. It lets the user submit a claim for multi-model fact-checking, disagreement detection, and risk estimation. It synthesizes a concise, critical consensus answer, and highlights where the models diverge—ignoring trivial formatting differences. You can also upload documents (PDF, DOCX, TXT) for context.
Why use it?