Such.AI / app.py
aaleya-5's picture
main changes
c9bb632 verified
import os
import gradio as gr
import json
import plotly
import plotly.graph_objects as go
from typing import Optional
import modal
from dotenv import load_dotenv
from mcp_server import app, MCPServer, parallel_model_query, consensus_algorithm, disagreement_analyzer, synthesize_consensus
from consensus_logic import ConsensusAnalyzer
import PyPDF2
import docx
# Load environment variables
load_dotenv()
# Initialize components
server = MCPServer()
consensus_analyzer = ConsensusAnalyzer()
def process_document(file_obj) -> Optional[str]:
"""Process uploaded document and return its content."""
if file_obj is None:
return None
try:
file_path = file_obj.name
file_extension = os.path.splitext(file_path)[1].lower()
if file_extension == '.pdf':
# Process PDF file
with open(file_path, 'rb') as file:
pdf_reader = PyPDF2.PdfReader(file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text() + "\n"
return text.strip()
elif file_extension == '.txt':
# Process text file
with open(file_path, 'r', encoding='utf-8') as file:
return file.read().strip()
elif file_extension == '.docx':
# Process Word document
doc = docx.Document(file_path)
text = ""
for paragraph in doc.paragraphs:
text += paragraph.text + "\n"
return text.strip()
else:
raise ValueError(f"Unsupported file type: {file_extension}")
except Exception as e:
print(f"Error processing document: {str(e)}")
return None
def create_heatmap(similarity_matrix, model_names):
"""Create a Plotly heatmap from similarity matrix."""
fig = go.Figure(data=go.Heatmap(
z=similarity_matrix,
x=model_names,
y=model_names,
colorscale='Viridis',
zmin=0,
zmax=1
))
fig.update_layout(
title='Model Response Similarity Matrix',
xaxis_title='Models',
yaxis_title='Models',
height=500,
width=500
)
return fig
def create_comparison_table(disagreement_result):
"""Create a comparison table showing model contradictions with enhanced details."""
if not disagreement_result or "disagreements" not in disagreement_result:
return []
table_rows = []
for disagreement in disagreement_result["disagreements"]:
cluster1_models = ", ".join(disagreement["cluster1"])
cluster2_models = ", ".join(disagreement["cluster2"])
# Format model names for display
cluster1_display = [model.split('/')[-1] for model in disagreement["cluster1"]]
cluster2_display = [model.split('/')[-1] for model in disagreement["cluster2"]]
# Get similarity score if available
similarity_score = disagreement.get("similarity_score", "N/A")
if isinstance(similarity_score, float):
similarity_display = f"{similarity_score:.3f}"
else:
similarity_display = str(similarity_score)
# Get disagreement points if available
disagreement_points = disagreement.get("disagreement_points", [])
if disagreement_points:
points_display = "; ".join(disagreement_points[:2]) # Show first 2 points
else:
points_display = "No specific points identified"
table_rows.append([
", ".join(cluster1_display),
", ".join(cluster2_display),
disagreement["type"],
f"Similarity: {similarity_display} | {disagreement['explanation']}",
points_display
])
return table_rows
def _format_topics(topics: dict) -> str:
"""Format topic analysis for display."""
if not topics:
return "No specific topics identified."
topic_lines = []
for topic, models in topics.items():
model_names = [model.split('/')[-1] for model in models] # Extract model name from full path
topic_lines.append(f"- **{topic.title()}:** {', '.join(model_names)}")
return "\n".join(topic_lines)
def _format_confidence_analysis(confidence_analysis: dict) -> str:
"""Format confidence analysis for display."""
if not confidence_analysis:
return "No confidence analysis available."
most_confident = confidence_analysis.get("most_confident_model", "Unknown")
least_confident = confidence_analysis.get("least_confident_model", "Unknown")
confidence_variance = confidence_analysis.get("confidence_variance", 0)
# Format model names for display
most_confident_name = most_confident.split('/')[-1] if '/' in most_confident else most_confident
least_confident_name = least_confident.split('/')[-1] if '/' in least_confident else least_confident
return f"""
- **Most Confident Model:** {most_confident_name}
- **Least Confident Model:** {least_confident_name}
- **Confidence Variance:** {confidence_variance:.3f}
"""
def _format_disagreement_summary(disagreement_summary: dict) -> str:
"""Format disagreement summary for display."""
if not disagreement_summary:
return "No disagreement summary available."
total_disagreements = disagreement_summary.get("total_disagreements", 0)
disagreement_types = disagreement_summary.get("disagreement_types", {})
avg_similarity = disagreement_summary.get("avg_similarity", 0)
if total_disagreements == 0:
return "No disagreements detected - models are in strong agreement."
type_lines = []
for d_type, count in disagreement_types.items():
type_lines.append(f"- {count} {d_type}")
return f"""
- **Total Disagreements:** {total_disagreements}
- **Average Similarity:** {avg_similarity:.3f}
- **Disagreement Types:**
{chr(10).join(type_lines)}
"""
def query_models(question: str, file_obj: Optional[gr.File]) -> tuple:
"""Process the query and return results."""
try:
# Get API key from environment variables
nebius_api_key = os.environ.get("NEBIUS_API_KEY")
if not nebius_api_key:
return (
[["Error", "API Key not set", "0%", "N/A"]],
None,
"## Error\nNEBIUS_API_KEY environment variable not set. Please ensure it's configured as a Modal Secret or in your local .env file.",
[],
"Error: API Key Missing"
)
# Append instruction for concise, critical answer
instruction = " Answer under 100 words and be critical."
if question.strip().endswith(('.', '?', '!')):
question = question.strip() + instruction
else:
question = question.strip() + '. ' + instruction
# Process document if provided
document_content = process_document(file_obj)
# Query models using Modal
with app.run() as app_instance:
try:
# Get model responses
result = parallel_model_query.remote(question, server.models, document_content)
if not result or "responses" not in result:
raise Exception("Invalid response from model query")
# Format results for display
model_responses = []
for model, response in result["responses"].items():
if isinstance(response, dict):
model_responses.append([
model,
response.get("response", "Error"),
f"{response.get('confidence', 0)*100:.1f}%" if response.get("status") == "success" else "N/A",
"N/A" # Disagreement explanation will be updated later
])
# Calculate consensus and analyze disagreements
try:
consensus_result = consensus_algorithm.remote(result["responses"])
disagreement_result = disagreement_analyzer.remote(result["responses"], api_key=nebius_api_key)
# Update model responses with disagreement info
for i, (model, _) in enumerate(result["responses"].items()):
if i < len(model_responses):
model_responses[i][3] = disagreement_result.get("explanation", "No explanation available")
# Create heatmap visualization
similarity_matrix = consensus_result.get("similarity_matrix", [])
model_names = list(result["responses"].keys())
heatmap_fig = create_heatmap(similarity_matrix, model_names)
# Create comparison table
print(f"Disagreement Result: {disagreement_result}")
comparison_table = create_comparison_table(disagreement_result)
# Synthesize consensus response
synthesized_response = synthesize_consensus.remote(result["responses"], disagreement_result.get("disagreements", []), api_key=nebius_api_key)
except Exception as e:
print(f"Error in consensus/disagreement analysis: {str(e)}")
heatmap_fig = None
comparison_table = []
synthesized_response = "Error synthesizing consensus response."
# Create final consensus explanation
consensus_explanation = f"""
## Consensus Analysis
**Overall Consensus Score:** {consensus_result.get('consensus_score', 0)*100:.1f}%
### Enhanced Disagreement Analysis
{disagreement_result.get('explanation', 'No explanation available')}
### Topic Analysis
{_format_topics(disagreement_result.get('topics', {}))}
### Confidence Analysis
{_format_confidence_analysis(disagreement_result.get('confidence_analysis', {}))}
### Disagreement Summary
{_format_disagreement_summary(disagreement_result.get('disagreement_summary', {}))}
"""
return (
model_responses, # Dataframe data
heatmap_fig, # Plot data
consensus_explanation, # Markdown
comparison_table, # Comparison table
synthesized_response, # Synthesized response
"Processing complete!" # Status
)
except Exception as e:
return (
[["Error", f"Modal execution failed: {str(e)}", "0%", "N/A"]],
None,
"## Error\nAn error occurred while processing your request.",
[],
"Error synthesizing consensus response.",
f"Error: {str(e)}"
)
except Exception as e:
return (
[["Error", f"An error occurred: {str(e)}", "0%", "N/A"]],
None,
"## Error\nAn unexpected error occurred.",
[],
"Error synthesizing consensus response.",
f"Error: {str(e)}"
)
# Enhanced CSS with title styling and improved layout
CUSTOM_CSS = """
@import url('https://fonts.googleapis.com/css2?family=Open+Sans:wght@400;700&display=swap');
body, .gradio-container, .gradio-input, .gradio-dataframe, .gradio-dataframe table, .gradio-dataframe th, .gradio-dataframe td, .gr-button, .gr-markdown, .gradio-accordion, input, textarea, select, button {
font-family: "Open Sans", sans-serif !important;
}
body, .gradio-container, .gradio-input, .gradio-dataframe, .gradio-dataframe table, .gradio-dataframe th, .gradio-dataframe td, .gr-button, .gr-markdown, .gradio-accordion, input, textarea, select, button {
font-family: "Open Sans", sans-serif !important;
}
/* Enhanced title styling with white border */
.main-title {
text-align: center;
font-size: 4rem;
font-weight: bold;
color: #2c3e50;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
padding: 10px 0px;
# border: 4px solid white;
border-radius: 15px;
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1);
margin: 5px auto 0 auto;
background-color: rgba(255, 255, 255, 0.9);
backdrop-filter: blur(10px);
}
.main-title-2 {
text-align: center;
font-size: 1.5rem;
font-weight: bold;
color: black;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
padding: 20px 30px;
# border: 4px solid white;
border-radius: 15px;
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1);
margin: 20px 0;
background-color: rgba(255, 255, 255, 0.9);
backdrop-filter: blur(10px);
}
/* Info section styling */
.info-section {
background-color: #171717;
# border: 3px solid #e2e8f0;
border-radius: 12px;
padding: 25px;
margin: 10px 0;
margin-top: 5px !important;
box-shadow: 0 4px 16px rgba(0, 0, 0, 0.1);
line-height: 1.6;
color: #ffffff;
font-size:1.1rem;
}
.info-section p {
color: white;
font-size: 1.1rem;
}
.info-section ul {
margin-left: 20px;
}
.info-section li {
margin-bottom: 8px;
color: #ffffff;
}
/* Container improvements */
.gradio-container {
border-radius: 10px;
box-shadow: 0 4px 8px rgba(0,0,0,0.1);
}
/* Button enhancements */
.gr-button {
border-radius: 10px;
transition: all 0.3s ease;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.5px;
}
.gr-button:hover {
background-color: var(--button-secondary-background-fill-hover);
transform: translateY(-2px);
box-shadow: 0 4px 12px rgba(0,0,0,0.15);
}
/* Input field improvements */
.gradio-input {
border-radius: 10px;
padding: 12px 16px;
border: 2px solid #e2e8f0;
transition: border-color 0.3s ease;
}
.gradio-input:focus {
border-color: #667eea;
box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
}
/* Dataframe styling */
.gradio-dataframe {
border-radius: 12px;
overflow: hidden;
box-shadow: 0 4px 16px rgba(0,0,0,0.1);
}
.gradio-dataframe table {
border-collapse: separate;
border-spacing: 0;
}
.gradio-dataframe th, .gradio-dataframe td {
padding: 15px 18px;
border-bottom: 1px solid var(--border-color-primary);
}
.gradio-dataframe th {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
color: white;
font-weight: bold;
text-transform: uppercase;
letter-spacing: 0.5px;
}
.gradio-dataframe tr:nth-child(even) {
background-color: rgba(102, 126, 234, 0.05);
}
.gradio-dataframe tr:last-child td {
border-bottom: none;
}
/* Markdown improvements */
.gr-markdown {
padding: 20px;
border-radius: 12px;
background: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%);
border: 1px solid #cbd5e0;
box-shadow: 0 2px 8px rgba(0,0,0,0.05);
}
/* Accordion styling */
.gradio-accordion {
margin-bottom: 15px;
border-radius: 10px;
overflow: hidden;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
}
/* Status indicator */
.status-indicator {
background: linear-gradient(45deg, #4CAF50, #45a049);
color: white;
padding: 10px 20px;
border-radius: 25px;
font-weight: bold;
text-align: center;
box-shadow: 0 2px 10px rgba(76, 175, 80, 0.3);
}
#footer {
position: sticky;
bottom: 0;
width: 100%;
background-color: #111;
color: #ccc;
text-align: center;
font-size: 0.85rem;
padding: 10px 20px;
z-index: 9999;
box-shadow: 0 -2px 10px rgba(0, 0, 0, 0.3);
}
/* Remove default top margin between Gradio blocks */
div.svelte-1ipelgc {
margin-top: 0px !important;
}
/* Remove extra vertical space between top-level blocks */
.gradio-container > div {
margin-top: 0px !important;
padding-top: 0px !important;
}
"""
with gr.Blocks(
title="Multi-Model Consensus Builder",
theme=gr.themes.Monochrome(),
analytics_enabled=False,
css=CUSTOM_CSS
) as demo:
# Enhanced title with white border
gr.HTML("""
<div class="main-title">
🤖 SUCH.AI
</div>
""")
# Enhanced info section
gr.HTML("""
<div class="info-section">
<p><strong>What is this?</strong> Such.AI lets you query multiple top-tier AI models (Llama, Gemma, Qwen, and more) at once, then analyzes and visualizes their agreement, confidence, and disagreements. It lets the user submit a claim for multi-model fact-checking, disagreement detection, and risk estimation. It synthesizes a concise, critical consensus answer, and highlights where the models diverge—ignoring trivial formatting differences. You can also upload documents (PDF, DOCX, TXT) for context.</p>
<p><strong>Why use it?</strong></p>
<ul>
<li>See where the smartest AIs agree (and where they don't)</li>
<li>Get a critical, concise consensus answer</li>
<li>Visualize model agreement with heatmaps and tables</li>
<li>Upload documents for context</li>
<li>Perfect for research, fact-checking, and critical thinking</li>
</ul>
</div>
""")
with gr.Row():
with gr.Column():
question = gr.Textbox(
label="Your Question",
placeholder="Ask anything...",
lines=3
)
file_input = gr.File(
label="Upload Document (optional)",
file_types=[".pdf", ".txt", ".docx"]
)
submit_btn = gr.Button("Get Consensus Analysis", variant="primary")
with gr.Column():
status = gr.HTML(label="Live Processing Status")
with gr.Row():
responses_df = gr.Dataframe(
label="Model Responses",
headers=["Model", "Response", "Confidence", "Reasoning"],
wrap=True
)
with gr.Row():
consensus_plot = gr.Plot(label="Consensus Visualization")
# Use collapsible for long markdown
with gr.Accordion("Consensus Analysis", open=True):
consensus_explanation = gr.Markdown(label="Final Consensus & Explanation")
with gr.Row():
with gr.Accordion("Model Contradictions Analysis", open=False):
comparison_table = gr.Dataframe(
# label="Model Contradictions Analysis",
headers=["Models Group 1", "Models Group 2", "Type of Disagreement", "Explanation", "Disagreement Points"],
wrap=True
)
with gr.Row():
with gr.Accordion("Synthesized Consensus Response", open=False):
synthesized_output = gr.Markdown(label="Synthesized Consensus Response")
with gr.Row():
gr.Markdown(
"""
<div style='width: 100%; text-align: center; padding: 20px; font-size: 1.1rem; color: #ccc;'>
Built with 💜 by <strong>Team Such.AI</strong><br>
for the <em>UNESCO Youth Hackathon 2025</em>
</div>
""",
elem_id="footer"
)
# Set up event handlers
submit_btn.click(
fn=query_models,
inputs=[question, file_input],
outputs=[responses_df, consensus_plot, consensus_explanation, comparison_table, synthesized_output, status],
show_progress=True # Show progress bar during processing
)
# Launch the app
if __name__ == "__main__":
demo.queue() # Enable queuing for better handling of concurrent requests
demo.launch(
share=True,
server_name="0.0.0.0",
server_port=7860,
show_error=True, # Show detailed error messages
favicon_path=None # Disable favicon to prevent 404
)