Dgaze / app.py
lightmate's picture
Update app.py
9b50d9f verified
"""
DGaze - Hugging Face Spaces Optimized Version
Simplified for Spaces deployment with proper HTTPS handling.
"""
import os
import gradio as gr
import time
import uuid
import asyncio
import aiohttp
import json
import re
from concurrent.futures import ThreadPoolExecutor, as_completed
# Import our modular components
try:
from config.settings import settings
from api.client import api_client
from components.verification_result import format_verification_results
except Exception as e:
print(f"ERROR: Failed to import required modules: {e}")
print("Please ensure all environment variables are set correctly.")
raise
# DeBERTa API Configuration from environment
DEBERTA_API_URL = os.getenv('DEBERTA_API_URL', 'https://rahulkc-dev--deberta-fever-vllm-pattern-serve.modal.run')
DEBERTA_API_KEY = os.getenv('DEBERTA_API_KEY', 'deberta-fever-secret-key')
print(f"DEBUG: DeBERTa API URL: {DEBERTA_API_URL}")
print(f"DEBUG: DeBERTa API Key: {'***' + DEBERTA_API_KEY[-4:] if DEBERTA_API_KEY else 'Not set'}")
# Simplified session management for HuggingFace Spaces (no FastAPI)
import uuid
session_data = {}
# Simplified session management for HF Spaces (no authentication)
def get_session_id():
"""Get a simple session ID for trial tracking."""
if not hasattr(get_session_id, 'session_id'):
get_session_id.session_id = f"hf_trial_{uuid.uuid4().hex[:12]}"
return get_session_id.session_id
# Simplified trial management for HF Spaces
trial_tracker = {}
# Trial limit configuration (can be changed easily)
TRIAL_LIMIT = 5 # Increased from 2 to 5 for better demo experience
def clear_trial_cache():
"""Clear all trial tracking data - useful for demos and testing."""
global trial_tracker
trial_tracker.clear()
print("βœ… Trial cache cleared - all users can start fresh!")
def get_trial_count() -> int:
"""Get current trial verification count."""
session_id = get_session_id()
return trial_tracker.get(session_id, 0)
def increment_trial_count() -> int:
"""Increment and return trial verification count."""
session_id = get_session_id()
current_count = trial_tracker.get(session_id, 0)
new_count = current_count + 1
trial_tracker[session_id] = new_count
return new_count
# Clear cache on startup to ensure fresh start
clear_trial_cache()
# DeBERTa API Functions
async def call_deberta_api_async(evidence: str, claim: str, session_timeout: int = 30) -> dict:
"""Async call to DeBERTa FEVER API for fact verification."""
try:
headers = {
"Authorization": f"Bearer {DEBERTA_API_KEY}",
"Content-Type": "application/json"
}
payload = {
"messages": [
{
"role": "user",
"content": f"Evidence: {evidence} Claim: {claim}"
}
],
"model": "MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli"
}
timeout = aiohttp.ClientTimeout(total=session_timeout)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.post(
f"{DEBERTA_API_URL}/v1/chat/completions",
headers=headers,
json=payload
) as response:
if response.status == 200:
result = await response.json()
content = result["choices"][0]["message"]["content"]
# Parse the response to extract verdict and confidence
verdict = "UNKNOWN"
confidence = 0.0
if "βœ… SUPPORTED" in content:
verdict = "SUPPORTED"
elif "❌ CONTRADICTED" in content:
verdict = "CONTRADICTED"
elif "πŸ€” NEUTRAL" in content:
verdict = "NEUTRAL"
# Extract confidence percentage
confidence_match = re.search(r'(\d+\.?\d*)%', content)
if confidence_match:
confidence = float(confidence_match.group(1))
# Calculate truthfulness score based on verdict and confidence
if verdict == "SUPPORTED":
truthfulness_score = confidence # High confidence support = high truthfulness
elif verdict == "CONTRADICTED":
truthfulness_score = 100 - confidence # High confidence contradiction = low truthfulness
else: # NEUTRAL
truthfulness_score = 50 # Neutral claims get middle truthfulness score
return {
"success": True,
"verdict": verdict,
"confidence": confidence,
"truthfulness_score": truthfulness_score,
"raw_response": content,
"claim": claim,
"evidence": evidence
}
else:
return {
"success": False,
"error": f"API returned status {response.status}",
"claim": claim,
"evidence": evidence
}
except Exception as e:
return {
"success": False,
"error": str(e),
"claim": claim,
"evidence": evidence
}
def call_deberta_api_sync(evidence: str, claim: str) -> dict:
"""Synchronous wrapper for DeBERTa API call."""
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
result = loop.run_until_complete(call_deberta_api_async(evidence, claim))
return result
finally:
loop.close()
def extract_claims_and_evidence(verification_data: dict) -> list:
"""Extract claims and evidence from verification pipeline results."""
claims_evidence_pairs = []
try:
# Try to extract from the verification pipeline results
# This will depend on your existing API structure
# Example extraction - adapt this to your actual data structure
if 'claims' in verification_data:
claims = verification_data['claims']
evidence_text = verification_data.get('evidence', verification_data.get('sources', ''))
for claim in claims:
claims_evidence_pairs.append({
'claim': claim,
'evidence': evidence_text
})
# Fallback: try to extract from other possible structures
elif 'pipeline_results' in verification_data:
pipeline_data = verification_data['pipeline_results']
# Extract claims from step 2 (if available)
if len(pipeline_data) > 1 and 'claims' in pipeline_data[1]:
claims = pipeline_data[1]['claims']
# Extract evidence from step 1 (sources)
evidence_text = ""
if len(pipeline_data) > 0 and 'sources' in pipeline_data[0]:
sources = pipeline_data[0]['sources']
evidence_text = " ".join([source.get('snippet', '') for source in sources[:3]])
for claim in claims:
claims_evidence_pairs.append({
'claim': claim,
'evidence': evidence_text
})
# If no structured claims found, create a general claim from input
if not claims_evidence_pairs and 'input_text' in verification_data:
input_text = verification_data['input_text']
# Use a simple heuristic to create evidence from sources
evidence_text = "General knowledge and web sources"
if 'sources' in verification_data:
sources = verification_data['sources']
evidence_text = " ".join([source.get('snippet', '') for source in sources[:3]])
claims_evidence_pairs.append({
'claim': input_text[:500], # Limit claim length
'evidence': evidence_text
})
except Exception as e:
print(f"ERROR extracting claims and evidence: {e}")
# Fallback to empty list
pass
return claims_evidence_pairs
def process_deberta_results_parallel(claims_evidence_pairs: list, max_workers: int = 3) -> list:
"""Process DeBERTa API calls in parallel."""
if not claims_evidence_pairs:
return []
deberta_results = []
# Limit the number of parallel requests to avoid overwhelming the API
with ThreadPoolExecutor(max_workers=min(max_workers, len(claims_evidence_pairs))) as executor:
# Submit all tasks
future_to_pair = {
executor.submit(call_deberta_api_sync, pair['evidence'], pair['claim']): pair
for pair in claims_evidence_pairs[:5] # Limit to 5 claims max
}
# Collect results as they complete
for future in as_completed(future_to_pair, timeout=45):
try:
result = future.result()
deberta_results.append(result)
except Exception as e:
pair = future_to_pair[future]
deberta_results.append({
"success": False,
"error": str(e),
"claim": pair['claim'],
"evidence": pair['evidence']
})
return deberta_results
def format_deberta_results(deberta_results: list) -> str:
"""Format DeBERTa results for display."""
if not deberta_results:
return ""
html = """
<div style="background: #f8f9fa; border: 1px solid #e9ecef; border-radius: 8px; padding: 1.5rem; margin: 1rem 0;">
<h3 style="margin: 0 0 1rem 0; color: #495057; font-size: 1.2rem; display: flex; align-items: center;">
🧠 AI Fact-Checking Analysis (DeBERTa FEVER)
</h3>
"""
successful_results = [r for r in deberta_results if r.get('success', False)]
failed_results = [r for r in deberta_results if not r.get('success', False)]
if successful_results:
# Calculate overall score
verdicts = [r['verdict'] for r in successful_results]
confidences = [r['confidence'] for r in successful_results]
avg_confidence = sum(confidences) / len(confidences) if confidences else 0
# Count verdicts
supported_count = verdicts.count('SUPPORTED')
contradicted_count = verdicts.count('CONTRADICTED')
neutral_count = verdicts.count('NEUTRAL')
# Calculate average truthfulness score
truthfulness_scores = [r.get('truthfulness_score', 50) for r in successful_results]
avg_truthfulness = sum(truthfulness_scores) / len(truthfulness_scores) if truthfulness_scores else 50
# Overall assessment
if supported_count > contradicted_count:
overall_verdict = "MOSTLY SUPPORTED"
verdict_color = "#28a745"
verdict_icon = "βœ…"
elif contradicted_count > supported_count:
overall_verdict = "MOSTLY CONTRADICTED"
verdict_color = "#dc3545"
verdict_icon = "❌"
else:
overall_verdict = "MIXED/NEUTRAL"
verdict_color = "#ffc107"
verdict_icon = "πŸ€”"
html += f"""
<div style="background: {verdict_color}; color: white; padding: 1rem; border-radius: 6px; margin-bottom: 1rem; text-align: center;">
<div style="font-size: 1.5rem; font-weight: bold; margin-bottom: 0.5rem;">
{verdict_icon} {overall_verdict}
</div>
<div style="font-size: 1rem; opacity: 0.9;">
Average Confidence: {avg_confidence:.1f}% | Truthfulness: {avg_truthfulness:.1f}% | Claims Analyzed: {len(successful_results)}
</div>
<div style="font-size: 0.9rem; opacity: 0.8; margin-top: 0.5rem;">
βœ… {supported_count} Supported β€’ ❌ {contradicted_count} Contradicted β€’ πŸ€” {neutral_count} Neutral
</div>
</div>
"""
# Individual results
html += "<div style='margin-top: 1rem;'><h4 style='margin-bottom: 0.5rem; color: #495057;'>Individual Claim Analysis:</h4>"
for i, result in enumerate(successful_results, 1):
verdict = result['verdict']
confidence = result['confidence']
truthfulness_score = result.get('truthfulness_score', 50)
claim = result['claim'][:150] + "..." if len(result['claim']) > 150 else result['claim']
# Choose colors and icons
if verdict == "SUPPORTED":
color = "#d4edda"
border_color = "#c3e6cb"
icon = "βœ…"
elif verdict == "CONTRADICTED":
color = "#f8d7da"
border_color = "#f5c6cb"
icon = "❌"
else:
color = "#fff3cd"
border_color = "#ffeaa7"
icon = "πŸ€”"
html += f"""
<div style="background: {color}; border: 1px solid {border_color}; border-radius: 4px; padding: 0.75rem; margin: 0.5rem 0;">
<div style="font-weight: 600; margin-bottom: 0.25rem; color: #495057;">
{icon} Claim {i}: {verdict} ({confidence:.1f}% confidence | {truthfulness_score:.1f}% truthfulness)
</div>
<div style="font-size: 0.9rem; color: #6c757d; font-style: italic;">
"{claim}"
</div>
</div>
"""
html += "</div>"
if failed_results:
html += f"""
<div style="background: #f8d7da; border: 1px solid #f5c6cb; border-radius: 4px; padding: 0.75rem; margin: 1rem 0;">
<div style="font-weight: 600; color: #721c24; margin-bottom: 0.25rem;">
⚠️ Some claims could not be analyzed ({len(failed_results)} failed)
</div>
<div style="font-size: 0.9rem; color: #721c24;">
DeBERTa API may be temporarily unavailable for some requests.
</div>
</div>
"""
html += "</div>"
return html
# No authentication routes for HF Spaces - simplified version
# Simplified verification function for HF Spaces
def verify_news_simple(input_text: str, progress=gr.Progress()) -> str:
"""Simplified verification function for HuggingFace Spaces."""
if not input_text.strip():
return "Please enter some text or URL to verify"
# Initialize progress tracking with better timing
progress(0, desc="πŸ” Initializing verification...")
time.sleep(0.8)
try:
# Check trial limits for free users
current_count = get_trial_count()
if current_count >= TRIAL_LIMIT:
return f"""
<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
padding: 2rem; border-radius: 12px; color: white; text-align: center;
box-shadow: 0 10px 25px rgba(0,0,0,0.1);">
<h2 style="margin: 0 0 1rem 0; font-size: 1.5rem; color: white !important;">πŸ” Trial Limit Reached</h2>
<p style="margin: 0 0 1.5rem 0; opacity: 0.9; color: white !important;">
You've used your {TRIAL_LIMIT} free verifications! This is a demo version on HuggingFace Spaces.
</p>
<p style="margin: 0; font-size: 0.9rem; opacity: 0.8; color: white !important;">
For unlimited access, deploy your own instance.
</p>
</div>
"""
# Increment trial count
new_count = increment_trial_count()
remaining = TRIAL_LIMIT - new_count
progress(0.05, desc="πŸ“ Processing input text...")
time.sleep(2) # 2 sec for input processing
progress(0.15, desc="πŸ” Extracting claims...")
time.sleep(2) # 2 sec for claims extraction
progress(0.25, desc="🌐 Searching for evidence...")
time.sleep(8) # Start of 15 sec evidence extraction
progress(0.45, desc="πŸ“š Gathering relevant sources...")
time.sleep(4) # Continue evidence extraction
progress(0.65, desc="πŸ”¬ Analyzing evidence quality...")
time.sleep(3) # Complete evidence extraction (total 15 sec)
progress(0.75, desc="🧠 Processing with AI...")
data = api_client.verify_news(input_text)
# Add input text to data for DeBERTa processing
data['input_text'] = input_text
progress(0.85, desc="πŸ€– Running DeBERTa fact-checking...")
# Extract claims and evidence
claims_evidence_pairs = extract_claims_and_evidence(data)
deberta_results = []
if claims_evidence_pairs and DEBERTA_API_URL:
try:
deberta_results = process_deberta_results_parallel(claims_evidence_pairs)
except Exception as e:
print(f"DeBERTa processing error: {e}")
# Continue without DeBERTa results
progress(0.95, desc="πŸ“Š Formatting results...")
time.sleep(1) # Rest processing
progress(1.0, desc="βœ… Verification complete!")
# Format main results
main_result = format_verification_results(data)
# Add DeBERTa results if available
deberta_html = format_deberta_results(deberta_results) if deberta_results else ""
# Add trial info
trial_info = f"""
<div style="background: #e3f2fd; border-left: 4px solid #2196f3; padding: 1rem; margin: 1rem 0; border-radius: 4px;">
<p style="margin: 0; color: #1565c0; font-size: 0.9rem;">
⚑ Free Trial: <strong>{remaining} verification{'s' if remaining != 1 else ''} remaining</strong>
</p>
</div>
"""
return trial_info + main_result + deberta_html
except Exception as e:
print(f"ERROR: {e}")
progress(1.0, desc="❌ Error occurred")
return f"Error: {str(e)}"
# Simplified for HF Spaces - no authentication UI needed
# Create the simplified Gradio interface for HF Spaces
def create_interface():
"""Create a simplified Gradio interface for HuggingFace Spaces."""
with gr.Blocks(
title="DGaze - News Verification",
css="""
/* Clean white theme */
html, body {
background-color: white !important;
margin: 0 !important;
padding: 0 !important;
}
.gradio-container {
background-color: white !important;
max-width: 1200px !important;
margin: 0 auto !important;
padding: 20px !important;
}
/* Trial banner styling */
.trial-banner {
background: linear-gradient(135deg, #9775fa 0%, #667eea 100%) !important;
color: white !important;
padding: 1.5rem !important;
border-radius: 12px !important;
margin-bottom: 2rem !important;
text-align: center !important;
box-shadow: 0 4px 15px rgba(151, 117, 250, 0.2) !important;
}
/* Button styling - match header section color (#9775fa) */
button[variant="primary"],
.gr-button-primary,
.gr-button[data-variant="primary"],
input[type="submit"] {
background: #9775fa !important;
background-color: #9775fa !important;
color: white !important;
border: none !important;
border-radius: 8px !important;
font-weight: 600 !important;
transition: all 0.3s ease !important;
}
button[variant="primary"]:hover,
.gr-button-primary:hover,
.gr-button[data-variant="primary"]:hover,
input[type="submit"]:hover {
background: #8b5ff8 !important;
background-color: #8b5ff8 !important;
transform: translateY(-1px) !important;
box-shadow: 0 4px 12px rgba(151, 117, 250, 0.3) !important;
}
/* Progress bar styling - targeting all possible Gradio progress elements */
.progress,
.gr-progress,
[data-testid="progress-bar"],
.progress-container {
background-color: #f0f0f0 !important;
border-radius: 10px !important;
overflow: hidden !important;
height: 16px !important;
margin: 15px 0 !important;
border: 2px solid #e0e0e0 !important;
box-shadow: inset 0 1px 3px rgba(0,0,0,0.1) !important;
}
.progress-bar,
.gr-progress-bar,
[data-testid="progress-bar"] > div,
.progress-container > div {
background-color: #748ffc !important;
background: linear-gradient(90deg, #748ffc 0%, #8b5ff8 100%) !important;
height: 100% !important;
border-radius: 8px !important;
transition: width 0.4s ease-in-out !important;
position: relative !important;
overflow: hidden !important;
}
/* Animated stripes for progress bar */
.progress-bar::before,
.gr-progress-bar::before,
[data-testid="progress-bar"] > div::before {
content: '' !important;
position: absolute !important;
top: 0 !important;
left: 0 !important;
right: 0 !important;
bottom: 0 !important;
background-image: linear-gradient(45deg,
rgba(255,255,255,0.2) 25%,
transparent 25%,
transparent 50%,
rgba(255,255,255,0.2) 50%,
rgba(255,255,255,0.2) 75%,
transparent 75%,
transparent) !important;
background-size: 20px 20px !important;
animation: progress-stripes 1s linear infinite !important;
}
.progress-text,
.gr-progress-text,
[data-testid="progress-text"] {
color: #748ffc !important;
font-weight: 600 !important;
font-size: 0.95rem !important;
margin: 8px 0 !important;
text-align: center !important;
padding: 4px 0 !important;
}
@keyframes progress-stripes {
0% { background-position: 0 0; }
100% { background-position: 20px 0; }
}
/* Progress container styling */
.progress-container-wrapper,
.gr-form .progress-wrap,
div:has([data-testid="progress-bar"]) {
background-color: #fafafa !important;
border: 2px solid #748ffc !important;
border-radius: 12px !important;
padding: 20px !important;
margin: 15px 0 !important;
box-shadow: 0 2px 8px rgba(116, 143, 252, 0.1) !important;
}
/* Additional Gradio-specific progress selectors */
.svelte-1b19cri,
.gradio-progress,
.progress-level-bar {
background-color: #748ffc !important;
background: linear-gradient(90deg, #748ffc 0%, #8b5ff8 100%) !important;
}
/* Force progress bar visibility */
div[data-testid] div[style*="width"] {
background-color: #748ffc !important;
background: linear-gradient(90deg, #748ffc 0%, #8b5ff8 100%) !important;
min-height: 16px !important;
}
/* Gradio queue progress styling */
.gradio-container .progress {
background-color: #f0f0f0 !important;
border-radius: 10px !important;
height: 16px !important;
overflow: hidden !important;
border: 2px solid #e0e0e0 !important;
}
.gradio-container .progress div {
background: linear-gradient(90deg, #748ffc 0%, #8b5ff8 100%) !important;
height: 100% !important;
border-radius: 8px !important;
transition: width 0.4s ease !important;
}
"""
) as demo:
# Header banner
gr.HTML(value="""
<div class="trial-banner">
<h1 style="margin: 0 0 0.5rem 0; font-size: 3rem; font-weight: 800; color: white !important;">πŸ” DGaze</h1>
<h2 style="margin: 0 0 1rem 0; font-size: 1.4rem; font-weight: 600; color: white !important;">Advanced News Verification System</h2>
<div style="background: rgba(255,255,255,0.15); border-radius: 8px; padding: 1rem; margin: 1rem 0;">
<h3 style="margin: 0 0 0.5rem 0; font-size: 1.2rem; font-weight: bold; color: white !important;">πŸš€ Free Demo Version</h3>
<p style="margin: 0; opacity: 0.9; font-size: 1rem; color: white !important;">Try our news verification system with 5 free searches per session!</p>
<p style="margin: 0.5rem 0 0 0; font-size: 0.9rem; opacity: 0.8; color: white !important;">Powered by DeBERTa AI + Evidence Search</p>
</div>
</div>
""")
# Main interface
with gr.Column():
input_text = gr.Textbox(
label="Enter news text or URL to verify",
placeholder="Paste news text to verify...",
lines=5,
max_lines=10
)
submit_btn = gr.Button("πŸ” Verify News", variant="primary")
with gr.Column():
output_html = gr.HTML(label="Verification Results", visible=True)
# Handle submission
submit_btn.click(fn=verify_news_simple, inputs=input_text, outputs=output_html)
input_text.submit(fn=verify_news_simple, inputs=input_text, outputs=output_html)
# Examples
gr.Examples(
examples=[
["""BREAKING: Revolutionary AI breakthrough! πŸš€ Scientists at MIT have developed a new quantum AI system that can predict earthquakes with 99.7% accuracy up to 6 months in advance. The system, called "QuakeNet AI", uses quantum computing combined with machine learning to analyze seismic patterns invisible to current technology. Dr. Sarah Chen, lead researcher, claims this could save millions of lives and prevent billions in damages. The technology will be commercially available by 2026 according to insider sources. This comes just weeks after similar breakthroughs in cancer detection AI. What do you think about this amazing discovery? #AI #Earthquake #MIT #Science"""],
["""SpaceX conducted another major test for their Starship program yesterday, with Elon Musk claiming on social media that Flight 10 is "ready to revolutionize space travel forever." The company fired up all 33 Raptor engines on their Super Heavy booster at the Starbase facility in Texas. According to various reports and this detailed article (https://www.space.com/space-exploration/launches-spacecraft/spacex-fires-up-super-heavy-booster-ahead-of-starships-10th-test-flight-video), the test was part of preparations for the upcoming 10th test flight. However, some critics argue that SpaceX is moving too fast without proper safety protocols, especially after Flight 9 experienced issues. The FAA is still investigating the previous mission where both the booster and ship were lost. Industry experts remain divided on whether this aggressive testing schedule is beneficial or dangerous for the future of commercial spaceflight. πŸš€ #SpaceX #Starship #Space"""]
],
inputs=[input_text]
)
return demo
# Create the Gradio interface
demo = create_interface()
# Enable queueing for progress bars to work properly
demo.queue()
# Export for HuggingFace Spaces to pick up
if __name__ == "__main__":
# Launch the Gradio demo directly for HF Spaces
demo.launch(
server_name="0.0.0.0",
server_port=int(os.environ.get("PORT", 7860)),
share=False,
show_error=True,
quiet=False
)