|
import os
|
|
import gradio as gr
|
|
import json
|
|
import plotly
|
|
import plotly.graph_objects as go
|
|
from typing import Optional
|
|
import modal
|
|
from dotenv import load_dotenv
|
|
from mcp_server import app, MCPServer, parallel_model_query, consensus_algorithm, disagreement_analyzer, synthesize_consensus
|
|
from consensus_logic import ConsensusAnalyzer
|
|
import PyPDF2
|
|
import docx
|
|
|
|
|
|
load_dotenv()
|
|
|
|
|
|
server = MCPServer()
|
|
consensus_analyzer = ConsensusAnalyzer()
|
|
|
|
def process_document(file_obj) -> Optional[str]:
|
|
"""Process uploaded document and return its content."""
|
|
if file_obj is None:
|
|
return None
|
|
|
|
try:
|
|
file_path = file_obj.name
|
|
file_extension = os.path.splitext(file_path)[1].lower()
|
|
|
|
if file_extension == '.pdf':
|
|
|
|
with open(file_path, 'rb') as file:
|
|
pdf_reader = PyPDF2.PdfReader(file)
|
|
text = ""
|
|
for page in pdf_reader.pages:
|
|
text += page.extract_text() + "\n"
|
|
return text.strip()
|
|
|
|
elif file_extension == '.txt':
|
|
|
|
with open(file_path, 'r', encoding='utf-8') as file:
|
|
return file.read().strip()
|
|
|
|
elif file_extension == '.docx':
|
|
|
|
doc = docx.Document(file_path)
|
|
text = ""
|
|
for paragraph in doc.paragraphs:
|
|
text += paragraph.text + "\n"
|
|
return text.strip()
|
|
|
|
else:
|
|
raise ValueError(f"Unsupported file type: {file_extension}")
|
|
|
|
except Exception as e:
|
|
print(f"Error processing document: {str(e)}")
|
|
return None
|
|
|
|
def create_heatmap(similarity_matrix, model_names):
|
|
"""Create a Plotly heatmap from similarity matrix."""
|
|
fig = go.Figure(data=go.Heatmap(
|
|
z=similarity_matrix,
|
|
x=model_names,
|
|
y=model_names,
|
|
colorscale='Viridis',
|
|
zmin=0,
|
|
zmax=1
|
|
))
|
|
|
|
fig.update_layout(
|
|
title='Model Response Similarity Matrix',
|
|
xaxis_title='Models',
|
|
yaxis_title='Models',
|
|
height=500,
|
|
width=500
|
|
)
|
|
|
|
return fig
|
|
|
|
def create_comparison_table(disagreement_result):
|
|
"""Create a comparison table showing model contradictions with enhanced details."""
|
|
if not disagreement_result or "disagreements" not in disagreement_result:
|
|
return []
|
|
|
|
table_rows = []
|
|
for disagreement in disagreement_result["disagreements"]:
|
|
cluster1_models = ", ".join(disagreement["cluster1"])
|
|
cluster2_models = ", ".join(disagreement["cluster2"])
|
|
|
|
|
|
cluster1_display = [model.split('/')[-1] for model in disagreement["cluster1"]]
|
|
cluster2_display = [model.split('/')[-1] for model in disagreement["cluster2"]]
|
|
|
|
|
|
similarity_score = disagreement.get("similarity_score", "N/A")
|
|
if isinstance(similarity_score, float):
|
|
similarity_display = f"{similarity_score:.3f}"
|
|
else:
|
|
similarity_display = str(similarity_score)
|
|
|
|
|
|
disagreement_points = disagreement.get("disagreement_points", [])
|
|
if disagreement_points:
|
|
points_display = "; ".join(disagreement_points[:2])
|
|
else:
|
|
points_display = "No specific points identified"
|
|
|
|
table_rows.append([
|
|
", ".join(cluster1_display),
|
|
", ".join(cluster2_display),
|
|
disagreement["type"],
|
|
f"Similarity: {similarity_display} | {disagreement['explanation']}",
|
|
points_display
|
|
])
|
|
|
|
return table_rows
|
|
|
|
def _format_topics(topics: dict) -> str:
|
|
"""Format topic analysis for display."""
|
|
if not topics:
|
|
return "No specific topics identified."
|
|
|
|
topic_lines = []
|
|
for topic, models in topics.items():
|
|
model_names = [model.split('/')[-1] for model in models]
|
|
topic_lines.append(f"- **{topic.title()}:** {', '.join(model_names)}")
|
|
|
|
return "\n".join(topic_lines)
|
|
|
|
def _format_confidence_analysis(confidence_analysis: dict) -> str:
|
|
"""Format confidence analysis for display."""
|
|
if not confidence_analysis:
|
|
return "No confidence analysis available."
|
|
|
|
most_confident = confidence_analysis.get("most_confident_model", "Unknown")
|
|
least_confident = confidence_analysis.get("least_confident_model", "Unknown")
|
|
confidence_variance = confidence_analysis.get("confidence_variance", 0)
|
|
|
|
|
|
most_confident_name = most_confident.split('/')[-1] if '/' in most_confident else most_confident
|
|
least_confident_name = least_confident.split('/')[-1] if '/' in least_confident else least_confident
|
|
|
|
return f"""
|
|
- **Most Confident Model:** {most_confident_name}
|
|
- **Least Confident Model:** {least_confident_name}
|
|
- **Confidence Variance:** {confidence_variance:.3f}
|
|
"""
|
|
|
|
def _format_disagreement_summary(disagreement_summary: dict) -> str:
|
|
"""Format disagreement summary for display."""
|
|
if not disagreement_summary:
|
|
return "No disagreement summary available."
|
|
|
|
total_disagreements = disagreement_summary.get("total_disagreements", 0)
|
|
disagreement_types = disagreement_summary.get("disagreement_types", {})
|
|
avg_similarity = disagreement_summary.get("avg_similarity", 0)
|
|
|
|
if total_disagreements == 0:
|
|
return "No disagreements detected - models are in strong agreement."
|
|
|
|
type_lines = []
|
|
for d_type, count in disagreement_types.items():
|
|
type_lines.append(f"- {count} {d_type}")
|
|
|
|
return f"""
|
|
- **Total Disagreements:** {total_disagreements}
|
|
- **Average Similarity:** {avg_similarity:.3f}
|
|
- **Disagreement Types:**
|
|
{chr(10).join(type_lines)}
|
|
"""
|
|
|
|
def query_models(question: str, file_obj: Optional[gr.File]) -> tuple:
|
|
"""Process the query and return results."""
|
|
try:
|
|
|
|
nebius_api_key = os.environ.get("NEBIUS_API_KEY")
|
|
if not nebius_api_key:
|
|
return (
|
|
[["Error", "API Key not set", "0%", "N/A"]],
|
|
None,
|
|
"## Error\nNEBIUS_API_KEY environment variable not set. Please ensure it's configured as a Modal Secret or in your local .env file.",
|
|
[],
|
|
"Error: API Key Missing"
|
|
)
|
|
|
|
|
|
instruction = " Answer under 100 words and be critical."
|
|
if question.strip().endswith(('.', '?', '!')):
|
|
question = question.strip() + instruction
|
|
else:
|
|
question = question.strip() + '. ' + instruction
|
|
|
|
|
|
document_content = process_document(file_obj)
|
|
|
|
|
|
with app.run() as app_instance:
|
|
try:
|
|
|
|
result = parallel_model_query.remote(question, server.models, document_content)
|
|
if not result or "responses" not in result:
|
|
raise Exception("Invalid response from model query")
|
|
|
|
|
|
model_responses = []
|
|
for model, response in result["responses"].items():
|
|
if isinstance(response, dict):
|
|
model_responses.append([
|
|
model,
|
|
response.get("response", "Error"),
|
|
f"{response.get('confidence', 0)*100:.1f}%" if response.get("status") == "success" else "N/A",
|
|
"N/A"
|
|
])
|
|
|
|
|
|
try:
|
|
consensus_result = consensus_algorithm.remote(result["responses"])
|
|
disagreement_result = disagreement_analyzer.remote(result["responses"], api_key=nebius_api_key)
|
|
|
|
|
|
for i, (model, _) in enumerate(result["responses"].items()):
|
|
if i < len(model_responses):
|
|
model_responses[i][3] = disagreement_result.get("explanation", "No explanation available")
|
|
|
|
|
|
similarity_matrix = consensus_result.get("similarity_matrix", [])
|
|
model_names = list(result["responses"].keys())
|
|
heatmap_fig = create_heatmap(similarity_matrix, model_names)
|
|
|
|
|
|
print(f"Disagreement Result: {disagreement_result}")
|
|
comparison_table = create_comparison_table(disagreement_result)
|
|
|
|
|
|
synthesized_response = synthesize_consensus.remote(result["responses"], disagreement_result.get("disagreements", []), api_key=nebius_api_key)
|
|
|
|
except Exception as e:
|
|
print(f"Error in consensus/disagreement analysis: {str(e)}")
|
|
heatmap_fig = None
|
|
comparison_table = []
|
|
synthesized_response = "Error synthesizing consensus response."
|
|
|
|
|
|
consensus_explanation = f"""
|
|
## Consensus Analysis
|
|
|
|
**Overall Consensus Score:** {consensus_result.get('consensus_score', 0)*100:.1f}%
|
|
|
|
### Enhanced Disagreement Analysis
|
|
{disagreement_result.get('explanation', 'No explanation available')}
|
|
|
|
### Topic Analysis
|
|
{_format_topics(disagreement_result.get('topics', {}))}
|
|
|
|
### Confidence Analysis
|
|
{_format_confidence_analysis(disagreement_result.get('confidence_analysis', {}))}
|
|
|
|
### Disagreement Summary
|
|
{_format_disagreement_summary(disagreement_result.get('disagreement_summary', {}))}
|
|
"""
|
|
|
|
return (
|
|
model_responses,
|
|
heatmap_fig,
|
|
consensus_explanation,
|
|
comparison_table,
|
|
synthesized_response,
|
|
"Processing complete!"
|
|
)
|
|
|
|
except Exception as e:
|
|
return (
|
|
[["Error", f"Modal execution failed: {str(e)}", "0%", "N/A"]],
|
|
None,
|
|
"## Error\nAn error occurred while processing your request.",
|
|
[],
|
|
"Error synthesizing consensus response.",
|
|
f"Error: {str(e)}"
|
|
)
|
|
|
|
except Exception as e:
|
|
return (
|
|
[["Error", f"An error occurred: {str(e)}", "0%", "N/A"]],
|
|
None,
|
|
"## Error\nAn unexpected error occurred.",
|
|
[],
|
|
"Error synthesizing consensus response.",
|
|
f"Error: {str(e)}"
|
|
)
|
|
|
|
|
|
CUSTOM_CSS = """
|
|
|
|
@import url('https://fonts.googleapis.com/css2?family=Open+Sans:wght@400;700&display=swap');
|
|
|
|
body, .gradio-container, .gradio-input, .gradio-dataframe, .gradio-dataframe table, .gradio-dataframe th, .gradio-dataframe td, .gr-button, .gr-markdown, .gradio-accordion, input, textarea, select, button {
|
|
font-family: "Open Sans", sans-serif !important;
|
|
}
|
|
|
|
body, .gradio-container, .gradio-input, .gradio-dataframe, .gradio-dataframe table, .gradio-dataframe th, .gradio-dataframe td, .gr-button, .gr-markdown, .gradio-accordion, input, textarea, select, button {
|
|
font-family: "Open Sans", sans-serif !important;
|
|
}
|
|
|
|
/* Enhanced title styling with white border */
|
|
.main-title {
|
|
text-align: center;
|
|
font-size: 4rem;
|
|
font-weight: bold;
|
|
color: #2c3e50;
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
-webkit-background-clip: text;
|
|
-webkit-text-fill-color: transparent;
|
|
background-clip: text;
|
|
padding: 10px 0px;
|
|
# border: 4px solid white;
|
|
border-radius: 15px;
|
|
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1);
|
|
margin: 5px auto 0 auto;
|
|
background-color: rgba(255, 255, 255, 0.9);
|
|
backdrop-filter: blur(10px);
|
|
}
|
|
|
|
.main-title-2 {
|
|
text-align: center;
|
|
font-size: 1.5rem;
|
|
font-weight: bold;
|
|
color: black;
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
-webkit-background-clip: text;
|
|
-webkit-text-fill-color: transparent;
|
|
background-clip: text;
|
|
padding: 20px 30px;
|
|
# border: 4px solid white;
|
|
border-radius: 15px;
|
|
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.1);
|
|
margin: 20px 0;
|
|
background-color: rgba(255, 255, 255, 0.9);
|
|
backdrop-filter: blur(10px);
|
|
}
|
|
/* Info section styling */
|
|
.info-section {
|
|
background-color: #171717;
|
|
# border: 3px solid #e2e8f0;
|
|
border-radius: 12px;
|
|
padding: 25px;
|
|
margin: 10px 0;
|
|
margin-top: 5px !important;
|
|
box-shadow: 0 4px 16px rgba(0, 0, 0, 0.1);
|
|
line-height: 1.6;
|
|
color: #ffffff;
|
|
font-size:1.1rem;
|
|
}
|
|
|
|
.info-section p {
|
|
color: white;
|
|
font-size: 1.1rem;
|
|
}
|
|
|
|
.info-section ul {
|
|
margin-left: 20px;
|
|
}
|
|
|
|
.info-section li {
|
|
margin-bottom: 8px;
|
|
color: #ffffff;
|
|
}
|
|
|
|
/* Container improvements */
|
|
.gradio-container {
|
|
border-radius: 10px;
|
|
box-shadow: 0 4px 8px rgba(0,0,0,0.1);
|
|
}
|
|
|
|
/* Button enhancements */
|
|
.gr-button {
|
|
border-radius: 10px;
|
|
transition: all 0.3s ease;
|
|
font-weight: 600;
|
|
text-transform: uppercase;
|
|
letter-spacing: 0.5px;
|
|
}
|
|
|
|
.gr-button:hover {
|
|
background-color: var(--button-secondary-background-fill-hover);
|
|
transform: translateY(-2px);
|
|
box-shadow: 0 4px 12px rgba(0,0,0,0.15);
|
|
}
|
|
|
|
/* Input field improvements */
|
|
.gradio-input {
|
|
border-radius: 10px;
|
|
padding: 12px 16px;
|
|
border: 2px solid #e2e8f0;
|
|
transition: border-color 0.3s ease;
|
|
}
|
|
|
|
.gradio-input:focus {
|
|
border-color: #667eea;
|
|
box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
|
|
}
|
|
|
|
/* Dataframe styling */
|
|
.gradio-dataframe {
|
|
border-radius: 12px;
|
|
overflow: hidden;
|
|
box-shadow: 0 4px 16px rgba(0,0,0,0.1);
|
|
}
|
|
|
|
.gradio-dataframe table {
|
|
border-collapse: separate;
|
|
border-spacing: 0;
|
|
}
|
|
|
|
.gradio-dataframe th, .gradio-dataframe td {
|
|
padding: 15px 18px;
|
|
border-bottom: 1px solid var(--border-color-primary);
|
|
}
|
|
|
|
.gradio-dataframe th {
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
|
color: white;
|
|
font-weight: bold;
|
|
text-transform: uppercase;
|
|
letter-spacing: 0.5px;
|
|
}
|
|
|
|
.gradio-dataframe tr:nth-child(even) {
|
|
background-color: rgba(102, 126, 234, 0.05);
|
|
}
|
|
|
|
.gradio-dataframe tr:last-child td {
|
|
border-bottom: none;
|
|
}
|
|
|
|
/* Markdown improvements */
|
|
.gr-markdown {
|
|
padding: 20px;
|
|
border-radius: 12px;
|
|
background: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%);
|
|
border: 1px solid #cbd5e0;
|
|
box-shadow: 0 2px 8px rgba(0,0,0,0.05);
|
|
}
|
|
|
|
/* Accordion styling */
|
|
.gradio-accordion {
|
|
margin-bottom: 15px;
|
|
border-radius: 10px;
|
|
overflow: hidden;
|
|
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
|
}
|
|
|
|
/* Status indicator */
|
|
.status-indicator {
|
|
background: linear-gradient(45deg, #4CAF50, #45a049);
|
|
color: white;
|
|
padding: 10px 20px;
|
|
border-radius: 25px;
|
|
font-weight: bold;
|
|
text-align: center;
|
|
box-shadow: 0 2px 10px rgba(76, 175, 80, 0.3);
|
|
}
|
|
|
|
#footer {
|
|
position: sticky;
|
|
bottom: 0;
|
|
width: 100%;
|
|
background-color: #111;
|
|
color: #ccc;
|
|
text-align: center;
|
|
font-size: 0.85rem;
|
|
padding: 10px 20px;
|
|
z-index: 9999;
|
|
box-shadow: 0 -2px 10px rgba(0, 0, 0, 0.3);
|
|
}
|
|
|
|
|
|
|
|
/* Remove default top margin between Gradio blocks */
|
|
div.svelte-1ipelgc {
|
|
margin-top: 0px !important;
|
|
}
|
|
|
|
/* Remove extra vertical space between top-level blocks */
|
|
.gradio-container > div {
|
|
margin-top: 0px !important;
|
|
padding-top: 0px !important;
|
|
}
|
|
|
|
"""
|
|
|
|
with gr.Blocks(
|
|
title="Multi-Model Consensus Builder",
|
|
theme=gr.themes.Monochrome(),
|
|
analytics_enabled=False,
|
|
css=CUSTOM_CSS
|
|
) as demo:
|
|
|
|
gr.HTML("""
|
|
<div class="main-title">
|
|
🤖 SUCH.AI
|
|
</div>
|
|
""")
|
|
|
|
|
|
gr.HTML("""
|
|
<div class="info-section">
|
|
<p><strong>What is this?</strong> Such.AI lets you query multiple top-tier AI models (Llama, Gemma, Qwen, and more) at once, then analyzes and visualizes their agreement, confidence, and disagreements. It lets the user submit a claim for multi-model fact-checking, disagreement detection, and risk estimation. It synthesizes a concise, critical consensus answer, and highlights where the models diverge—ignoring trivial formatting differences. You can also upload documents (PDF, DOCX, TXT) for context.</p>
|
|
|
|
<p><strong>Why use it?</strong></p>
|
|
<ul>
|
|
<li>See where the smartest AIs agree (and where they don't)</li>
|
|
<li>Get a critical, concise consensus answer</li>
|
|
<li>Visualize model agreement with heatmaps and tables</li>
|
|
<li>Upload documents for context</li>
|
|
<li>Perfect for research, fact-checking, and critical thinking</li>
|
|
</ul>
|
|
</div>
|
|
""")
|
|
|
|
with gr.Row():
|
|
with gr.Column():
|
|
question = gr.Textbox(
|
|
label="Your Question",
|
|
placeholder="Ask anything...",
|
|
lines=3
|
|
)
|
|
file_input = gr.File(
|
|
label="Upload Document (optional)",
|
|
file_types=[".pdf", ".txt", ".docx"]
|
|
)
|
|
submit_btn = gr.Button("Get Consensus Analysis", variant="primary")
|
|
|
|
with gr.Column():
|
|
status = gr.HTML(label="Live Processing Status")
|
|
|
|
with gr.Row():
|
|
responses_df = gr.Dataframe(
|
|
label="Model Responses",
|
|
headers=["Model", "Response", "Confidence", "Reasoning"],
|
|
wrap=True
|
|
)
|
|
|
|
with gr.Row():
|
|
consensus_plot = gr.Plot(label="Consensus Visualization")
|
|
|
|
with gr.Accordion("Consensus Analysis", open=True):
|
|
consensus_explanation = gr.Markdown(label="Final Consensus & Explanation")
|
|
|
|
with gr.Row():
|
|
with gr.Accordion("Model Contradictions Analysis", open=False):
|
|
comparison_table = gr.Dataframe(
|
|
|
|
headers=["Models Group 1", "Models Group 2", "Type of Disagreement", "Explanation", "Disagreement Points"],
|
|
wrap=True
|
|
)
|
|
|
|
with gr.Row():
|
|
with gr.Accordion("Synthesized Consensus Response", open=False):
|
|
synthesized_output = gr.Markdown(label="Synthesized Consensus Response")
|
|
|
|
with gr.Row():
|
|
gr.Markdown(
|
|
"""
|
|
<div style='width: 100%; text-align: center; padding: 20px; font-size: 1.1rem; color: #ccc;'>
|
|
Built with 💜 by <strong>Team Such.AI</strong><br>
|
|
for the <em>UNESCO Youth Hackathon 2025</em>
|
|
</div>
|
|
""",
|
|
elem_id="footer"
|
|
)
|
|
|
|
|
|
|
|
submit_btn.click(
|
|
fn=query_models,
|
|
inputs=[question, file_input],
|
|
outputs=[responses_df, consensus_plot, consensus_explanation, comparison_table, synthesized_output, status],
|
|
show_progress=True
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
demo.queue()
|
|
demo.launch(
|
|
share=True,
|
|
server_name="0.0.0.0",
|
|
server_port=7860,
|
|
show_error=True,
|
|
favicon_path=None
|
|
) |