Spaces:

aaleya-5
/

Such.AI

Sleeping

File size: 21,123 Bytes

c9bb632

import os
import gradio as gr
import json
import plotly
import plotly.graph_objects as go
from typing import Optional
import modal
from dotenv import load_dotenv
from mcp_server import app, MCPServer, parallel_model_query, consensus_algorithm, disagreement_analyzer, synthesize_consensus
from consensus_logic import ConsensusAnalyzer
import PyPDF2
import docx

# Load environment variables
load_dotenv()

# Initialize components
server = MCPServer()
consensus_analyzer = ConsensusAnalyzer()

def process_document(file_obj) -> Optional[str]:
    """Process uploaded document and return its content."""
    if file_obj is None:
        return None
        
    try:
        file_path = file_obj.name
        file_extension = os.path.splitext(file_path)[1].lower()
        
        if file_extension == '.pdf':
            # Process PDF file
            with open(file_path, 'rb') as file:
                pdf_reader = PyPDF2.PdfReader(file)
                text = ""
                for page in pdf_reader.pages:
                    text += page.extract_text() + "\n"
                return text.strip()
                
        elif file_extension == '.txt':
            # Process text file
            with open(file_path, 'r', encoding='utf-8') as file:
                return file.read().strip()
                
        elif file_extension == '.docx':
            # Process Word document
            doc = docx.Document(file_path)
            text = ""
            for paragraph in doc.paragraphs:
                text += paragraph.text + "\n"
            return text.strip()
            
        else:
            raise ValueError(f"Unsupported file type: {file_extension}")
            
    except Exception as e:
        print(f"Error processing document: {str(e)}")
        return None

def create_heatmap(similarity_matrix, model_names):
    """Create a Plotly heatmap from similarity matrix."""
    fig = go.Figure(data=go.Heatmap(
        z=similarity_matrix,
        x=model_names,
        y=model_names,
        colorscale='Viridis',
        zmin=0,
        zmax=1
    ))
    
    fig.update_layout(
        title='Model Response Similarity Matrix',
        xaxis_title='Models',
        yaxis_title='Models',
        height=500,
        width=500
    )
    
    return fig

def create_comparison_table(disagreement_result):
    """Create a comparison table showing model contradictions with enhanced details."""
    if not disagreement_result or "disagreements" not in disagreement_result:
        return []
    
    table_rows = []
    for disagreement in disagreement_result["disagreements"]:
        cluster1_models = ", ".join(disagreement["cluster1"])
        cluster2_models = ", ".join(disagreement["cluster2"])
        
        # Format model names for display
        cluster1_display = [model.split('/')[-1] for model in disagreement["cluster1"]]
        cluster2_display = [model.split('/')[-1] for model in disagreement["cluster2"]]
        
        # Get similarity score if available
        similarity_score = disagreement.get("similarity_score", "N/A")
        if isinstance(similarity_score, float):
            similarity_display = f"{similarity_score:.3f}"
        else:
            similarity_display = str(similarity_score)
        
        # Get disagreement points if available
        disagreement_points = disagreement.get("disagreement_points", [])
        if disagreement_points:
            points_display = "; ".join(disagreement_points[:2])  # Show first 2 points
        else:
            points_display = "No specific points identified"
        
        table_rows.append([
            ", ".join(cluster1_display),
            ", ".join(cluster2_display),
            disagreement["type"],
            f"Similarity: {similarity_display} | {disagreement['explanation']}",
            points_display
        ])
    
    return table_rows

def _format_topics(topics: dict) -> str:
    """Format topic analysis for display."""
    if not topics:
        return "No specific topics identified."
    
    topic_lines = []
    for topic, models in topics.items():
        model_names = [model.split('/')[-1] for model in models]  # Extract model name from full path
        topic_lines.append(f"- **{topic.title()}:** {', '.join(model_names)}")
    
    return "\n".join(topic_lines)

def _format_confidence_analysis(confidence_analysis: dict) -> str:
    """Format confidence analysis for display."""
    if not confidence_analysis:
        return "No confidence analysis available."
    
    most_confident = confidence_analysis.get("most_confident_model", "Unknown")
    least_confident = confidence_analysis.get("least_confident_model", "Unknown")
    confidence_variance = confidence_analysis.get("confidence_variance", 0)
    
    # Format model names for display
    most_confident_name = most_confident.split('/')[-1] if '/' in most_confident else most_confident
    least_confident_name = least_confident.split('/')[-1] if '/' in least_confident else least_confident
    
    return f"""

- **Most Confident Model:** {most_confident_name}

- **Least Confident Model:** {least_confident_name}

- **Confidence Variance:** {confidence_variance:.3f}

"""

def _format_disagreement_summary(disagreement_summary: dict) -> str:
    """Format disagreement summary for display."""
    if not disagreement_summary:
        return "No disagreement summary available."
    
    total_disagreements = disagreement_summary.get("total_disagreements", 0)
    disagreement_types = disagreement_summary.get("disagreement_types", {})
    avg_similarity = disagreement_summary.get("avg_similarity", 0)
    
    if total_disagreements == 0:
        return "No disagreements detected - models are in strong agreement."
    
    type_lines = []
    for d_type, count in disagreement_types.items():
        type_lines.append(f"- {count} {d_type}")
    
    return f"""

- **Total Disagreements:** {total_disagreements}

- **Average Similarity:** {avg_similarity:.3f}

- **Disagreement Types:**

{chr(10).join(type_lines)}

"""

def query_models(question: str, file_obj: Optional[gr.File]) -> tuple:
    """Process the query and return results."""
    try:
        # Get API key from environment variables
        nebius_api_key = os.environ.get("NEBIUS_API_KEY")
        if not nebius_api_key:
            return (
                [["Error", "API Key not set", "0%", "N/A"]],
                None,
                "## Error\nNEBIUS_API_KEY environment variable not set. Please ensure it's configured as a Modal Secret or in your local .env file.",
                [],
                "Error: API Key Missing"
            )

        # Append instruction for concise, critical answer
        instruction = " Answer under 100 words and be critical."
        if question.strip().endswith(('.', '?', '!')):
            question = question.strip() + instruction
        else:
            question = question.strip() + '. ' + instruction

        # Process document if provided
        document_content = process_document(file_obj)
        
        # Query models using Modal
        with app.run() as app_instance:
            try:
                # Get model responses
                result = parallel_model_query.remote(question, server.models, document_content)
                if not result or "responses" not in result:
                    raise Exception("Invalid response from model query")
                
                # Format results for display
                model_responses = []
                for model, response in result["responses"].items():
                    if isinstance(response, dict):
                        model_responses.append([
                            model,
                            response.get("response", "Error"),
                            f"{response.get('confidence', 0)*100:.1f}%" if response.get("status") == "success" else "N/A",
                            "N/A"   # Disagreement explanation will be updated later
                        ])
                
                # Calculate consensus and analyze disagreements
                try:
                    consensus_result = consensus_algorithm.remote(result["responses"])
                    disagreement_result = disagreement_analyzer.remote(result["responses"], api_key=nebius_api_key)
                    
                    # Update model responses with disagreement info
                    for i, (model, _) in enumerate(result["responses"].items()):
                        if i < len(model_responses):
                            model_responses[i][3] = disagreement_result.get("explanation", "No explanation available")
                    
                    # Create heatmap visualization
                    similarity_matrix = consensus_result.get("similarity_matrix", [])
                    model_names = list(result["responses"].keys())
                    heatmap_fig = create_heatmap(similarity_matrix, model_names)
                    
                    # Create comparison table
                    print(f"Disagreement Result: {disagreement_result}")
                    comparison_table = create_comparison_table(disagreement_result)
                    
                    # Synthesize consensus response
                    synthesized_response = synthesize_consensus.remote(result["responses"], disagreement_result.get("disagreements", []), api_key=nebius_api_key)
                    
                except Exception as e:
                    print(f"Error in consensus/disagreement analysis: {str(e)}")
                    heatmap_fig = None
                    comparison_table = []
                    synthesized_response = "Error synthesizing consensus response."
                
                # Create final consensus explanation
                consensus_explanation = f"""

                ## Consensus Analysis

                

                **Overall Consensus Score:** {consensus_result.get('consensus_score', 0)*100:.1f}%

                

                ### Enhanced Disagreement Analysis

                {disagreement_result.get('explanation', 'No explanation available')}

                

                ### Topic Analysis

                {_format_topics(disagreement_result.get('topics', {}))}

                

                ### Confidence Analysis

                {_format_confidence_analysis(disagreement_result.get('confidence_analysis', {}))}

                

                ### Disagreement Summary

                {_format_disagreement_summary(disagreement_result.get('disagreement_summary', {}))}

                """
                
                return (
                    model_responses,  # Dataframe data
                    heatmap_fig,      # Plot data
                    consensus_explanation,  # Markdown
                    comparison_table,  # Comparison table
                    synthesized_response, # Synthesized response
                    "Processing complete!"  # Status
                )
                
            except Exception as e:
                return (
                    [["Error", f"Modal execution failed: {str(e)}", "0%", "N/A"]],
                    None,
                    "## Error\nAn error occurred while processing your request.",
                    [],
                    "Error synthesizing consensus response.",
                    f"Error: {str(e)}"
                )
        
    except Exception as e:
        return (
            [["Error", f"An error occurred: {str(e)}", "0%", "N/A"]],
            None,
            "## Error\nAn unexpected error occurred.",
            [],
            "Error synthesizing consensus response.",
            f"Error: {str(e)}"
        )

# Enhanced CSS with title styling and improved layout
CUSTOM_CSS = """



@import url('https://fonts.googleapis.com/css2?family=Open+Sans:wght@400;700&display=swap');



body, .gradio-container, .gradio-input, .gradio-dataframe, .gradio-dataframe table, .gradio-dataframe th, .gradio-dataframe td, .gr-button, .gr-markdown, .gradio-accordion, input, textarea, select, button {

    font-family: "Open Sans", sans-serif !important;

}



body, .gradio-container, .gradio-input, .gradio-dataframe, .gradio-dataframe table, .gradio-dataframe th, .gradio-dataframe td, .gr-button, .gr-markdown, .gradio-accordion, input, textarea, select, button {

    font-family: "Open Sans", sans-serif !important;

}



/* Enhanced title styling with white border */

.main-title {

    text-align: center;

    font-size: 4rem;

    font-weight: bold;

    color: #2c3e50;

    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);

    -webkit-background-clip: text;

    -webkit-text-fill-color: transparent;

    background-clip: text;

    padding: 10px 0px;

    # border: 4px solid white;

    border-radius: 15px;

    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1);

    margin: 5px auto 0 auto;

    background-color: rgba(255, 255, 255, 0.9);

    backdrop-filter: blur(10px);

}



.main-title-2 {

    text-align: center;

    font-size: 1.5rem;

    font-weight: bold;

    color: black;

    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);

    -webkit-background-clip: text;

    -webkit-text-fill-color: transparent;

    background-clip: text;

    padding: 20px 30px;

    # border: 4px solid white;

    border-radius: 15px;

    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1);

    margin: 20px 0;

    background-color: rgba(255, 255, 255, 0.9);

    backdrop-filter: blur(10px);

}

/* Info section styling */

.info-section {

    background-color: #171717;

    # border: 3px solid #e2e8f0;

    border-radius: 12px;

    padding: 25px;

    margin: 10px 0;

    margin-top: 5px !important;

    box-shadow: 0 4px 16px rgba(0, 0, 0, 0.1);

    line-height: 1.6;

    color: #ffffff;

    font-size:1.1rem;

}



.info-section p {

    color: white;

    font-size: 1.1rem;

}



.info-section ul {

    margin-left: 20px;

}



.info-section li {

    margin-bottom: 8px;

    color: #ffffff;

}



/* Container improvements */

.gradio-container { 

    border-radius: 10px; 

    box-shadow: 0 4px 8px rgba(0,0,0,0.1);

}



/* Button enhancements */

.gr-button { 

    border-radius: 10px; 

    transition: all 0.3s ease;

    font-weight: 600;

    text-transform: uppercase;

    letter-spacing: 0.5px;

}



.gr-button:hover { 

    background-color: var(--button-secondary-background-fill-hover);

    transform: translateY(-2px);

    box-shadow: 0 4px 12px rgba(0,0,0,0.15);

}



/* Input field improvements */

.gradio-input { 

    border-radius: 10px; 

    padding: 12px 16px;

    border: 2px solid #e2e8f0;

    transition: border-color 0.3s ease;

}



.gradio-input:focus {

    border-color: #667eea;

    box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);

}



/* Dataframe styling */

.gradio-dataframe { 

    border-radius: 12px; 

    overflow: hidden;

    box-shadow: 0 4px 16px rgba(0,0,0,0.1);

}



.gradio-dataframe table { 

    border-collapse: separate; 

    border-spacing: 0; 

}



.gradio-dataframe th, .gradio-dataframe td { 

    padding: 15px 18px; 

    border-bottom: 1px solid var(--border-color-primary); 

}



.gradio-dataframe th { 

    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);

    color: white;

    font-weight: bold;

    text-transform: uppercase;

    letter-spacing: 0.5px;

}



.gradio-dataframe tr:nth-child(even) {

    background-color: rgba(102, 126, 234, 0.05);

}



.gradio-dataframe tr:last-child td { 

    border-bottom: none; 

}



/* Markdown improvements */

.gr-markdown { 

    padding: 20px; 

    border-radius: 12px; 

    background: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%);

    border: 1px solid #cbd5e0;

    box-shadow: 0 2px 8px rgba(0,0,0,0.05);

}



/* Accordion styling */

.gradio-accordion { 

    margin-bottom: 15px;

    border-radius: 10px;

    overflow: hidden;

    box-shadow: 0 2px 8px rgba(0,0,0,0.1);

}



/* Status indicator */

.status-indicator {

    background: linear-gradient(45deg, #4CAF50, #45a049);

    color: white;

    padding: 10px 20px;

    border-radius: 25px;

    font-weight: bold;

    text-align: center;

    box-shadow: 0 2px 10px rgba(76, 175, 80, 0.3);

}



#footer {

    position: sticky;

    bottom: 0;

    width: 100%;

    background-color: #111;

    color: #ccc;

    text-align: center;

    font-size: 0.85rem;

    padding: 10px 20px;

    z-index: 9999;

    box-shadow: 0 -2px 10px rgba(0, 0, 0, 0.3);

}







/* Remove default top margin between Gradio blocks */

div.svelte-1ipelgc {

    margin-top: 0px !important;

}



/* Remove extra vertical space between top-level blocks */

.gradio-container > div {

    margin-top: 0px !important;

    padding-top: 0px !important;

}



"""

with gr.Blocks(
    title="Multi-Model Consensus Builder",
    theme=gr.themes.Monochrome(),
    analytics_enabled=False,
    css=CUSTOM_CSS
) as demo:
    # Enhanced title with white border
    gr.HTML("""

        <div class="main-title">

            🤖 SUCH.AI

        </div>

    """)
    
    # Enhanced info section
    gr.HTML("""

        <div class="info-section">

            <p><strong>What is this?</strong> Such.AI lets you query multiple top-tier AI models (Llama, Gemma, Qwen, and more) at once, then analyzes and visualizes their agreement, confidence, and disagreements. It lets the user submit a claim for multi-model fact-checking, disagreement detection, and risk estimation. It synthesizes a concise, critical consensus answer, and highlights where the models diverge—ignoring trivial formatting differences. You can also upload documents (PDF, DOCX, TXT) for context.</p>

            

            <p><strong>Why use it?</strong></p>

            <ul>

                <li>See where the smartest AIs agree (and where they don't)</li>

                <li>Get a critical, concise consensus answer</li>

                <li>Visualize model agreement with heatmaps and tables</li>

                <li>Upload documents for context</li>

                <li>Perfect for research, fact-checking, and critical thinking</li>

            </ul>

        </div>

    """)
    
    with gr.Row():
        with gr.Column():
            question = gr.Textbox(
                label="Your Question",
                placeholder="Ask anything...",
                lines=3
            )
            file_input = gr.File(
                label="Upload Document (optional)",
                file_types=[".pdf", ".txt", ".docx"]
            )
            submit_btn = gr.Button("Get Consensus Analysis", variant="primary")
        
        with gr.Column():
            status = gr.HTML(label="Live Processing Status")
    
    with gr.Row():
        responses_df = gr.Dataframe(
            label="Model Responses",
            headers=["Model", "Response", "Confidence", "Reasoning"],
            wrap=True
        )
    
    with gr.Row():
        consensus_plot = gr.Plot(label="Consensus Visualization")
        # Use collapsible for long markdown
        with gr.Accordion("Consensus Analysis", open=True):
            consensus_explanation = gr.Markdown(label="Final Consensus & Explanation")
    
    with gr.Row():
        with gr.Accordion("Model Contradictions Analysis", open=False):
            comparison_table = gr.Dataframe(
                # label="Model Contradictions Analysis",
                headers=["Models Group 1", "Models Group 2", "Type of Disagreement", "Explanation", "Disagreement Points"],
                wrap=True
            )
    
    with gr.Row():
        with gr.Accordion("Synthesized Consensus Response", open=False):
            synthesized_output = gr.Markdown(label="Synthesized Consensus Response")

    with gr.Row():
        gr.Markdown(
        """

        <div style='width: 100%; text-align: center; padding: 20px; font-size: 1.1rem; color: #ccc;'>

            Built with 💜 by <strong>Team Such.AI</strong><br>

            for the <em>UNESCO Youth Hackathon 2025</em>

        </div>

        """,
        elem_id="footer"
    )

    
    # Set up event handlers
    submit_btn.click(
        fn=query_models,
        inputs=[question, file_input],
        outputs=[responses_df, consensus_plot, consensus_explanation, comparison_table, synthesized_output, status],
        show_progress=True  # Show progress bar during processing
    )

# Launch the app
if __name__ == "__main__":
    demo.queue()  # Enable queuing for better handling of concurrent requests
    demo.launch(
        share=True,
        server_name="0.0.0.0",
        server_port=7860,
        show_error=True,  # Show detailed error messages
        favicon_path=None  # Disable favicon to prevent 404
    )