Spaces:
Sleeping
Sleeping
import gradio as gr | |
import google.generativeai as genai | |
from ddgs import DDGS | |
import requests | |
from bs4 import BeautifulSoup | |
import time | |
from urllib.parse import urlparse | |
import re | |
import json | |
from typing import List, Dict, Any | |
from datetime import datetime | |
import os | |
import tempfile | |
from reportlab.lib.pagesizes import letter, A4 | |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Table, TableStyle | |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle | |
from reportlab.lib.units import inch | |
from reportlab.lib import colors | |
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT | |
import markdown | |
# Application Constants | |
APP_NAME = "DeepResearchAgent-AI" | |
APP_VERSION = "v2.0" | |
APP_DESCRIPTION = "Advanced AI-Powered Research Assistant" | |
# Enhanced topic detection and search helper functions | |
def detect_topic_category(query: str) -> str: | |
"""Detect the category of research topic for specialized search strategies""" | |
politics_keywords = ['politics', 'political', 'government', 'policy', 'election', 'democracy', 'parliament', 'congress', 'senate', 'president', 'minister', 'geopolitics', 'diplomacy', 'foreign policy', 'international relations'] | |
history_keywords = ['history', 'historical', 'ancient', 'medieval', 'world war', 'civilization', 'empire', 'dynasty', 'revolution', 'century', 'era', 'timeline', 'past', 'heritage'] | |
geography_keywords = ['geography', 'geographical', 'country', 'continent', 'ocean', 'mountain', 'river', 'climate', 'population', 'capital', 'border', 'region', 'territory', 'map'] | |
current_affairs_keywords = ['current', 'news', 'today', 'recent', 'latest', 'breaking', 'update', 'happening', '2024', '2025', 'this year', 'now'] | |
technology_keywords = ['technology', 'tech', 'ai', 'artificial intelligence', 'machine learning', 'software', 'hardware', 'computer', 'digital', 'programming', 'coding', 'algorithm', 'data science', 'cybersecurity'] | |
war_keywords = ['war', 'warfare', 'conflict', 'battle', 'military', 'army', 'defense', 'weapon', 'strategy', 'combat', 'invasion', 'occupation', 'siege'] | |
economics_keywords = ['economy', 'economic', 'finance', 'financial', 'market', 'trade', 'business', 'industry', 'company', 'corporation', 'gdp', 'inflation', 'recession'] | |
science_keywords = ['science', 'scientific', 'research', 'study', 'experiment', 'discovery', 'innovation', 'physics', 'chemistry', 'biology', 'medicine', 'health'] | |
query_lower = query.lower() | |
if any(keyword in query_lower for keyword in politics_keywords): | |
return 'politics' | |
elif any(keyword in query_lower for keyword in history_keywords): | |
return 'history' | |
elif any(keyword in query_lower for keyword in geography_keywords): | |
return 'geography' | |
elif any(keyword in query_lower for keyword in current_affairs_keywords): | |
return 'current_affairs' | |
elif any(keyword in query_lower for keyword in technology_keywords): | |
return 'technology' | |
elif any(keyword in query_lower for keyword in war_keywords): | |
return 'war' | |
elif any(keyword in query_lower for keyword in economics_keywords): | |
return 'economics' | |
elif any(keyword in query_lower for keyword in science_keywords): | |
return 'science' | |
else: | |
return 'general' | |
def get_specialized_domains(topic_type: str) -> List[str]: | |
"""Get specialized domains based on topic category""" | |
domain_mapping = { | |
'politics': ['reuters.com', 'bbc.com', 'cnn.com', 'politico.com', 'foreignaffairs.com', 'cfr.org', 'brookings.edu', 'csis.org'], | |
'history': ['britannica.com', 'history.com', 'nationalgeographic.com', 'smithsonianmag.com', 'historynet.com', 'worldhistory.org'], | |
'geography': ['nationalgeographic.com', 'worldatlas.com', 'britannica.com', 'cia.gov', 'worldbank.org', 'un.org'], | |
'current_affairs': ['reuters.com', 'bbc.com', 'cnn.com', 'ap.org', 'npr.org', 'aljazeera.com', 'theguardian.com', 'nytimes.com'], | |
'technology': ['techcrunch.com', 'wired.com', 'ars-technica.com', 'ieee.org', 'nature.com', 'sciencemag.org', 'mit.edu', 'stanford.edu'], | |
'war': ['janes.com', 'defensenews.com', 'militarytimes.com', 'csis.org', 'rand.org', 'stratfor.com'], | |
'economics': ['reuters.com', 'bloomberg.com', 'economist.com', 'ft.com', 'worldbank.org', 'imf.org', 'federalreserve.gov'], | |
'science': ['nature.com', 'sciencemag.org', 'scientificamerican.com', 'newscientist.com', 'pnas.org', 'cell.com'], | |
'general': ['wikipedia.org', 'britannica.com', 'reuters.com', 'bbc.com', 'cnn.com'] | |
} | |
return domain_mapping.get(topic_type, domain_mapping['general']) | |
def get_topic_keywords(query: str, topic_type: str) -> List[str]: | |
"""Get enhanced keywords based on topic category""" | |
keyword_mapping = { | |
'politics': ['analysis', 'policy', 'government', 'official', 'statement', 'report', 'briefing', 'summit', 'debate', 'legislation'], | |
'history': ['timeline', 'chronology', 'facts', 'documented', 'archive', 'primary source', 'historian', 'evidence', 'analysis', 'context'], | |
'geography': ['facts', 'statistics', 'data', 'demographic', 'topography', 'atlas', 'survey', 'official', 'census', 'coordinates'], | |
'current_affairs': ['breaking', 'latest', 'update', 'developing', 'live', 'recent', 'today', 'headlines', 'news', 'report'], | |
'technology': ['innovation', 'breakthrough', 'development', 'advancement', 'research', 'cutting-edge', 'emerging', 'trend', 'future', 'application'], | |
'war': ['analysis', 'strategy', 'tactics', 'intelligence', 'assessment', 'report', 'conflict', 'situation', 'update', 'briefing'], | |
'economics': ['analysis', 'forecast', 'data', 'statistics', 'trend', 'market', 'report', 'outlook', 'indicator', 'growth'], | |
'science': ['research', 'study', 'discovery', 'breakthrough', 'publication', 'peer-reviewed', 'journal', 'findings', 'methodology', 'evidence'], | |
'general': ['information', 'facts', 'comprehensive', 'detailed', 'overview', 'guide', 'explanation', 'analysis', 'summary', 'background'] | |
} | |
return keyword_mapping.get(topic_type, keyword_mapping['general']) | |
def get_priority_domains_for_topic(topic_type: str) -> List[str]: | |
"""Get priority domains for result ranking based on topic""" | |
priority_mapping = { | |
'politics': ['reuters.com', 'bbc.com', 'cnn.com', 'politico.com', 'foreignaffairs.com', 'cfr.org', 'brookings.edu', 'apnews.com'], | |
'history': ['britannica.com', 'history.com', 'nationalgeographic.com', 'smithsonianmag.com', 'worldhistory.org', 'historynet.com'], | |
'geography': ['nationalgeographic.com', 'worldatlas.com', 'britannica.com', 'cia.gov', 'worldbank.org', 'un.org'], | |
'current_affairs': ['reuters.com', 'bbc.com', 'cnn.com', 'ap.org', 'npr.org', 'aljazeera.com', 'theguardian.com', 'nytimes.com'], | |
'technology': ['techcrunch.com', 'wired.com', 'ars-technica.com', 'ieee.org', 'nature.com', 'mit.edu', 'stanford.edu', 'acm.org'], | |
'war': ['janes.com', 'defensenews.com', 'csis.org', 'rand.org', 'stratfor.com', 'cfr.org'], | |
'economics': ['reuters.com', 'bloomberg.com', 'economist.com', 'ft.com', 'worldbank.org', 'imf.org', 'federalreserve.gov'], | |
'science': ['nature.com', 'sciencemag.org', 'scientificamerican.com', 'newscientist.com', 'pnas.org', 'cell.com'], | |
'general': ['wikipedia.org', 'britannica.com', 'reuters.com', 'bbc.com', 'cnn.com', 'nationalgeographic.com'] | |
} | |
return priority_mapping.get(topic_type, priority_mapping['general']) | |
# Sanitize filename for safe file creation | |
def sanitize_filename(filename: str) -> str: | |
"""Sanitize filename to remove invalid characters for Windows/Unix systems""" | |
# Remove or replace invalid characters | |
invalid_chars = '<>:"/\\|?*' | |
for char in invalid_chars: | |
filename = filename.replace(char, '_') | |
# Remove multiple consecutive underscores and trim | |
filename = re.sub(r'_+', '_', filename) | |
filename = filename.strip('_') | |
# Limit length to prevent issues | |
if len(filename) > 200: | |
filename = filename[:200] | |
# Ensure it's not empty and add extension if missing | |
if not filename: | |
filename = "research_report" | |
if not filename.endswith('.md'): | |
filename += '.md' | |
return filename | |
# PDF Generation Function | |
def create_pdf_report(content: str, topic: str, sources: List[Dict], filename: str) -> str: | |
"""Create a professional PDF report from markdown content""" | |
try: | |
# Create temporary PDF file | |
temp_dir = tempfile.gettempdir() | |
pdf_path = os.path.join(temp_dir, filename.replace('.md', '.pdf')) | |
# Create PDF document | |
doc = SimpleDocTemplate(pdf_path, pagesize=A4, topMargin=1*inch, bottomMargin=1*inch) | |
styles = getSampleStyleSheet() | |
story = [] | |
# Custom styles | |
title_style = ParagraphStyle( | |
'CustomTitle', | |
parent=styles['Heading1'], | |
fontSize=24, | |
textColor=colors.HexColor('#2C3E50'), | |
spaceAfter=30, | |
alignment=TA_CENTER, | |
fontName='Helvetica-Bold' | |
) | |
subtitle_style = ParagraphStyle( | |
'CustomSubtitle', | |
parent=styles['Heading2'], | |
fontSize=14, | |
textColor=colors.HexColor('#34495E'), | |
spaceAfter=20, | |
alignment=TA_CENTER | |
) | |
header_style = ParagraphStyle( | |
'CustomHeader', | |
parent=styles['Heading2'], | |
fontSize=16, | |
textColor=colors.HexColor('#2980B9'), | |
spaceAfter=12, | |
spaceBefore=20, | |
fontName='Helvetica-Bold' | |
) | |
body_style = ParagraphStyle( | |
'CustomBody', | |
parent=styles['Normal'], | |
fontSize=11, | |
textColor=colors.HexColor('#2C3E50'), | |
spaceAfter=6, | |
alignment=TA_LEFT, | |
leading=14 | |
) | |
# Header Section | |
story.append(Paragraph(APP_NAME, title_style)) | |
story.append(Paragraph(APP_DESCRIPTION, subtitle_style)) | |
story.append(Spacer(1, 0.2*inch)) | |
# Add decorative line | |
line_data = [['', '']] | |
line_table = Table(line_data, colWidths=[5*inch]) | |
line_table.setStyle(TableStyle([ | |
('LINEBELOW', (0,0), (-1,-1), 2, colors.HexColor('#3498DB')), | |
])) | |
story.append(line_table) | |
story.append(Spacer(1, 0.3*inch)) | |
# Research Topic | |
story.append(Paragraph("Research Topic", header_style)) | |
story.append(Paragraph(topic, body_style)) | |
story.append(Spacer(1, 0.2*inch)) | |
# Generation Info | |
current_time = datetime.now().strftime("%B %d, %Y at %I:%M %p") | |
story.append(Paragraph("Generated", header_style)) | |
story.append(Paragraph(f"{current_time}", body_style)) | |
story.append(Spacer(1, 0.2*inch)) | |
# Sources Summary | |
if sources: | |
story.append(Paragraph("Sources Analyzed", header_style)) | |
story.append(Paragraph(f"{len(sources)} reliable sources processed", body_style)) | |
story.append(Spacer(1, 0.3*inch)) | |
story.append(PageBreak()) | |
# Main Content | |
story.append(Paragraph("Research Report", header_style)) | |
story.append(Spacer(1, 0.1*inch)) | |
# Process markdown content | |
lines = content.split('\n') | |
for line in lines: | |
line = line.strip() | |
if not line: | |
story.append(Spacer(1, 6)) | |
continue | |
if line.startswith('# '): | |
story.append(Paragraph(line[2:], header_style)) | |
elif line.startswith('## '): | |
story.append(Paragraph(line[3:], header_style)) | |
elif line.startswith('### '): | |
header_3_style = ParagraphStyle( | |
'Header3', | |
parent=header_style, | |
fontSize=14, | |
textColor=colors.HexColor('#7F8C8D') | |
) | |
story.append(Paragraph(line[4:], header_3_style)) | |
elif line.startswith('**') and line.endswith('**'): | |
bold_style = ParagraphStyle( | |
'Bold', | |
parent=body_style, | |
fontName='Helvetica-Bold' | |
) | |
story.append(Paragraph(line[2:-2], bold_style)) | |
elif line.startswith('- ') or line.startswith('* '): | |
bullet_style = ParagraphStyle( | |
'Bullet', | |
parent=body_style, | |
leftIndent=20, | |
bulletIndent=10, | |
bulletText='•', | |
bulletColor=colors.HexColor('#3498DB') | |
) | |
story.append(Paragraph(line[2:], bullet_style)) | |
elif line.startswith(('1. ', '2. ', '3. ', '4. ', '5. ')): | |
story.append(Paragraph(line, body_style)) | |
else: | |
# Clean basic markdown formatting | |
line = re.sub(r'\*\*(.*?)\*\*', r'<b>\1</b>', line) | |
line = re.sub(r'\*(.*?)\*', r'<i>\1</i>', line) | |
story.append(Paragraph(line, body_style)) | |
# Footer section | |
story.append(PageBreak()) | |
story.append(Paragraph("Sources", header_style)) | |
if sources: | |
for i, source in enumerate(sources[:10], 1): # Limit to 10 sources | |
source_style = ParagraphStyle( | |
'Source', | |
parent=body_style, | |
fontSize=10, | |
leftIndent=10, | |
spaceAfter=8 | |
) | |
title = source.get('title', 'No Title')[:100] | |
url = source.get('url', '') | |
story.append(Paragraph(f"{i}. {title}", source_style)) | |
if url: | |
url_style = ParagraphStyle( | |
'URL', | |
parent=source_style, | |
fontSize=9, | |
textColor=colors.HexColor('#3498DB'), | |
leftIndent=20 | |
) | |
story.append(Paragraph(url, url_style)) | |
# Footer | |
story.append(Spacer(1, 0.5*inch)) | |
footer_style = ParagraphStyle( | |
'Footer', | |
parent=styles['Normal'], | |
fontSize=10, | |
textColor=colors.HexColor('#7F8C8D'), | |
alignment=TA_CENTER | |
) | |
story.append(Paragraph(f"Generated by {APP_NAME} {APP_VERSION} | Advanced AI Research Assistant", footer_style)) | |
# Build PDF | |
doc.build(story) | |
return pdf_path | |
except Exception as e: | |
print(f"PDF generation error: {e}") | |
return None | |
# Validate Gemini API key | |
def validate_api_key(api_key: str) -> tuple[bool, str]: | |
"""Validate if the Gemini API key is working""" | |
if not api_key or not api_key.strip(): | |
return False, "❌ API key is empty. Please enter a valid Gemini API key." | |
api_key = api_key.strip() | |
# Basic format checks | |
if len(api_key) < 20: | |
return False, "❌ API key seems too short. Please check that you copied the complete key." | |
if not api_key.replace('-', '').replace('_', '').isalnum(): | |
return False, "❌ API key contains invalid characters. Please check your key format." | |
try: | |
# Test the API key with a simple request | |
genai.configure(api_key=api_key) | |
model = genai.GenerativeModel('gemini-2.0-flash') | |
# Try a minimal test generation with timeout | |
response = model.generate_content("Test", generation_config={"max_output_tokens": 10}) | |
return True, "✅ API key is valid and working!" | |
except Exception as e: | |
error_msg = str(e).lower() | |
print(f"API Key validation error: {e}") # Debug info | |
if "api key not valid" in error_msg or "api_key_invalid" in error_msg: | |
return False, """❌ Invalid API key. Please check your Gemini API key and try again. | |
**Common issues:** | |
• Make sure you copied the ENTIRE key from https://aistudio.google.com/ | |
• Check for extra spaces at the beginning or end | |
• Try refreshing the page and copying the key again | |
• Make sure you're using the correct API key (not mixing up with other services)""" | |
elif "quota" in error_msg or "limit" in error_msg: | |
return False, """❌ API quota exceeded. Your Gemini API usage limit has been reached. | |
**Solutions:** | |
• Check your usage at https://aistudio.google.com/ | |
• Wait for the quota to reset (usually monthly) | |
• Consider upgrading your plan if needed""" | |
elif "permission" in error_msg or "forbidden" in error_msg: | |
return False, """❌ API key doesn't have required permissions. | |
**Solutions:** | |
• Regenerate your API key at https://aistudio.google.com/ | |
• Make sure the API key is enabled for Gemini API | |
• Check if your Google Cloud project has the necessary permissions""" | |
elif "network" in error_msg or "connection" in error_msg or "timeout" in error_msg: | |
return False, """❌ Network error. Please check your internet connection and try again. | |
**Troubleshooting:** | |
• Check your internet connection | |
• Try again in a few minutes | |
• Disable VPN if you're using one | |
• Check if Google services are accessible in your region""" | |
elif "model" in error_msg: | |
return False, """❌ Model not available. The specified Gemini model might not be available. | |
**Solutions:** | |
• Try using a different model (like 'gemini-pro') | |
• Check Gemini API availability at https://status.cloud.google.com/""" | |
else: | |
return False, f"""❌ API key validation failed: {str(e)} | |
**Debugging tips:** | |
• Make sure you're using a valid Gemini API key from https://aistudio.google.com/ | |
• Try creating a new API key if the current one doesn't work | |
• Check the Google Cloud Console for any billing or permission issues""" | |
# Search the web for relevant information using DuckDuckGo with enhanced targeting for diverse topics | |
def web_search(query: str, max_results: int = 15) -> List[Dict[str, str]]: | |
"""Enhanced search for diverse topics: Politics, History, Technology, Current Affairs, etc.""" | |
try: | |
with DDGS() as ddgs: | |
all_results = [] | |
# Detect topic category for specialized search | |
topic_type = detect_topic_category(query.lower()) | |
print(f"Detected topic category: {topic_type}") | |
# Strategy 1: Exact phrase search | |
try: | |
exact_results = list(ddgs.text(f'"{query}"', max_results=max_results//3)) | |
all_results.extend(exact_results) | |
print(f"Found {len(exact_results)} results from exact search") | |
except Exception as e: | |
print(f"Exact search error: {e}") | |
# Strategy 2: Topic-specific domain searches | |
specialized_domains = get_specialized_domains(topic_type) | |
for domain in specialized_domains: | |
try: | |
domain_results = list(ddgs.text(f'{query} site:{domain}', max_results=2)) | |
all_results.extend(domain_results) | |
if len(all_results) >= max_results: | |
break | |
except Exception as e: | |
print(f"Domain search error for {domain}: {e}") | |
continue | |
# Strategy 3: Enhanced keyword searches based on topic | |
enhanced_keywords = get_topic_keywords(query, topic_type) | |
for keyword in enhanced_keywords[:5]: | |
try: | |
keyword_results = list(ddgs.text(f'{query} {keyword}', max_results=2)) | |
all_results.extend(keyword_results) | |
if len(all_results) >= max_results: | |
break | |
except Exception as e: | |
print(f"Keyword search error for {keyword}: {e}") | |
continue | |
# Strategy 4: Time-based searches for current affairs | |
if topic_type in ['current_affairs', 'politics', 'technology', 'news']: | |
time_modifiers = ['2024', '2025', 'latest', 'recent', 'current', 'today', 'this year'] | |
for modifier in time_modifiers[:3]: | |
try: | |
time_results = list(ddgs.text(f'{query} {modifier}', max_results=2)) | |
all_results.extend(time_results) | |
if len(all_results) >= max_results: | |
break | |
except Exception as e: | |
print(f"Time-based search error for {modifier}: {e}") | |
continue | |
# Strategy 5: Academic and authoritative sources | |
academic_modifiers = ['analysis', 'research', 'study', 'report', 'comprehensive', 'detailed'] | |
for modifier in academic_modifiers[:3]: | |
try: | |
academic_results = list(ddgs.text(f'{query} {modifier}', max_results=2)) | |
all_results.extend(academic_results) | |
if len(all_results) >= max_results: | |
break | |
except Exception as e: | |
print(f"Academic search error for {modifier}: {e}") | |
continue | |
# Strategy 6: Fallback comprehensive search | |
if len(all_results) < 8: | |
try: | |
general_results = list(ddgs.text(query, max_results=max_results//2)) | |
all_results.extend(general_results) | |
except Exception as e: | |
print(f"General search error: {e}") | |
# Remove duplicates and prioritize authoritative domains | |
seen_urls = set() | |
unique_results = [] | |
priority_domains = get_priority_domains_for_topic(topic_type) | |
# First, add results from priority domains | |
for result in all_results: | |
url = result.get('href', '') | |
if url not in seen_urls and any(domain in url for domain in priority_domains): | |
seen_urls.add(url) | |
unique_results.append(result) | |
if len(unique_results) >= max_results: | |
break | |
# Then add other unique results | |
for result in all_results: | |
url = result.get('href', '') | |
if url not in seen_urls: | |
seen_urls.add(url) | |
unique_results.append(result) | |
if len(unique_results) >= max_results: | |
break | |
print(f"Total unique results found: {len(unique_results)}") | |
return unique_results[:max_results] | |
except Exception as e: | |
print(f"Search error: {e}") | |
# Final fallback - simple search | |
try: | |
with DDGS() as ddgs: | |
results = list(ddgs.text(query, max_results=min(max_results, 5))) | |
print(f"Fallback search found: {len(results)} results") | |
return results | |
except Exception as e2: | |
print(f"Fallback search error: {e2}") | |
return [] | |
# Fetch and extract content from a URL with better error handling | |
def fetch_url_content(url: str) -> str: | |
"""Fetch content from a URL and extract meaningful text with enhanced error handling""" | |
try: | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', | |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', | |
'Accept-Language': 'en-US,en;q=0.5', | |
'Accept-Encoding': 'gzip, deflate', | |
'Connection': 'keep-alive', | |
'Upgrade-Insecure-Requests': '1', | |
} | |
# Increase timeout and add retries | |
response = requests.get(url, headers=headers, timeout=15, allow_redirects=True) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.content, 'html.parser') | |
# Remove unwanted elements | |
for element in soup(['script', 'style', 'nav', 'footer', 'header', 'aside', 'iframe', 'noscript']): | |
element.decompose() | |
# Try to get the main content area first | |
main_content = soup.find('main') or soup.find('article') or soup.find('div', class_=['content', 'main', 'body']) | |
if main_content: | |
text = main_content.get_text() | |
else: | |
text = soup.get_text() | |
# Clean up text more thoroughly | |
lines = (line.strip() for line in text.splitlines()) | |
chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) | |
text = ' '.join(chunk for chunk in chunks if chunk and len(chunk) > 2) | |
# Remove excessive whitespace and clean up | |
text = re.sub(r'\s+', ' ', text) | |
text = text.strip() | |
# Return more content for better analysis - increased from 5000 to 8000 | |
return text[:8000] if text else "" | |
except requests.exceptions.Timeout: | |
print(f"Timeout error for {url} - trying with shorter timeout") | |
try: | |
# Retry with shorter timeout | |
response = requests.get(url, headers=headers, timeout=8, allow_redirects=True) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.content, 'html.parser') | |
text = soup.get_text() | |
text = re.sub(r'\s+', ' ', text.strip()) | |
return text[:5000] if text else "" | |
except Exception as retry_error: | |
print(f"Retry failed for {url}: {retry_error}") | |
return "" | |
except requests.exceptions.RequestException as e: | |
print(f"Request error fetching {url}: {e}") | |
return "" | |
except Exception as e: | |
print(f"Unexpected error fetching {url}: {e}") | |
return "" | |
# Research function using web search and content extraction with enhanced analysis for diverse topics | |
def perform_research(query: str, max_sources: int = 12) -> Dict[str, Any]: | |
"""Perform comprehensive research by searching and extracting content from multiple sources""" | |
print(f"🔍 Starting comprehensive research for: {query}") | |
# Detect topic category for better research strategy | |
topic_type = detect_topic_category(query.lower()) | |
print(f"📊 Detected topic category: {topic_type}") | |
# Search for relevant sources with more results to ensure we get at least 10 quality sources | |
search_results = web_search(query, max_results=max_sources*4) # Get more results initially | |
print(f"📊 Found {len(search_results)} potential sources") | |
sources = [] | |
content_chunks = [] | |
successful_fetches = 0 | |
failed_fetches = 0 | |
for i, result in enumerate(search_results): | |
if successful_fetches >= max_sources: | |
break | |
url = result.get('href', '') | |
title = result.get('title', 'No title') | |
# Skip low-quality or duplicate sources | |
if should_skip_source(url, title, sources): | |
print(f"⏭️ Skipping {url} - low quality or duplicate") | |
continue | |
print(f"🌐 Fetching content from {url}") | |
content = fetch_url_content(url) | |
if content and len(content) > 150: # Minimum content threshold | |
# Validate content quality for the specific topic | |
if is_relevant_content(content, query, topic_type): | |
sources.append({ | |
'title': title, | |
'url': url, | |
'content': content, | |
'topic_type': topic_type | |
}) | |
content_chunks.append(f"SOURCE {successful_fetches + 1} [{topic_type.upper()}]:\nTITLE: {title}\nURL: {url}\nCONTENT:\n{content}\n{'='*100}\n") | |
successful_fetches += 1 | |
print(f"✅ Successfully extracted {len(content)} characters from source {successful_fetches}") | |
else: | |
print(f"⚠️ Content not relevant for {query}") | |
failed_fetches += 1 | |
else: | |
print(f"⚠️ Skipped {url} - insufficient content ({len(content) if content else 0} chars)") | |
failed_fetches += 1 | |
# Add small delay to be respectful | |
time.sleep(0.3) | |
# If we don't have enough sources, try a broader search | |
if successful_fetches < 8: | |
print(f"🔄 Only found {successful_fetches} quality sources, trying broader search...") | |
broader_results = web_search(f"{query} comprehensive analysis", max_results=15) | |
for result in broader_results: | |
if successful_fetches >= max_sources: | |
break | |
url = result.get('href', '') | |
title = result.get('title', 'No title') | |
if should_skip_source(url, title, sources): | |
continue | |
content = fetch_url_content(url) | |
if content and len(content) > 100: | |
sources.append({ | |
'title': title, | |
'url': url, | |
'content': content, | |
'topic_type': 'additional' | |
}) | |
content_chunks.append(f"ADDITIONAL SOURCE {successful_fetches + 1}:\nTITLE: {title}\nURL: {url}\nCONTENT:\n{content}\n{'='*100}\n") | |
successful_fetches += 1 | |
print(f"✅ Additional source {successful_fetches} added") | |
time.sleep(0.3) | |
research_context = "\n".join(content_chunks) | |
print(f"📝 Research completed: {successful_fetches} sources processed, {failed_fetches} failed") | |
print(f"📊 Total content length: {len(research_context)} characters") | |
return { | |
'sources': sources, | |
'research_context': research_context, | |
'query': query, | |
'total_sources': successful_fetches, | |
'topic_type': topic_type, | |
'failed_sources': failed_fetches | |
} | |
def should_skip_source(url: str, title: str, existing_sources: List[Dict]) -> bool: | |
"""Check if a source should be skipped based on quality and duplication""" | |
# Skip if URL already exists | |
existing_urls = [source['url'] for source in existing_sources] | |
if url in existing_urls: | |
return True | |
# Skip low-quality domains | |
low_quality_domains = ['pinterest.com', 'instagram.com', 'facebook.com', 'twitter.com', 'tiktok.com', 'reddit.com'] | |
if any(domain in url for domain in low_quality_domains): | |
return True | |
# Skip if title is too short or generic | |
if len(title) < 10 or title.lower() in ['no title', 'untitled', 'page not found']: | |
return True | |
return False | |
def is_relevant_content(content: str, query: str, topic_type: str) -> bool: | |
"""Check if content is relevant to the query and topic type""" | |
content_lower = content.lower() | |
query_words = query.lower().split() | |
# Check if at least 30% of query words appear in content | |
matching_words = sum(1 for word in query_words if word in content_lower) | |
word_relevance = matching_words / len(query_words) if query_words else 0 | |
# Topic-specific relevance keywords | |
topic_relevance_keywords = get_topic_keywords(query, topic_type) | |
topic_matches = sum(1 for keyword in topic_relevance_keywords if keyword.lower() in content_lower) | |
# Content should have reasonable length and relevance | |
return len(content) > 200 and (word_relevance >= 0.3 or topic_matches >= 2) | |
# Generate a research report using Gemini with enhanced topic handling | |
def generate_research_report(research_data: Dict[str, Any], gemini_api_key: str) -> str: | |
"""Generate a comprehensive research report using Gemini for diverse topics""" | |
if not gemini_api_key: | |
return "❌ Gemini API key is required to generate the report." | |
# Validate API key first | |
is_valid, validation_message = validate_api_key(gemini_api_key) | |
if not is_valid: | |
return f"❌ {validation_message}" | |
try: | |
# Initialize Gemini (already configured in validation) | |
model = genai.GenerativeModel('gemini-2.0-flash') | |
topic_type = research_data.get('topic_type', 'general') | |
failed_sources = research_data.get('failed_sources', 0) | |
# Create topic-specific prompt | |
prompt = f""" | |
RESEARCH QUERY: {research_data['query']} | |
TOPIC CATEGORY: {topic_type.upper()} | |
TOTAL SOURCES ANALYZED: {research_data.get('total_sources', len(research_data['sources']))} | |
FAILED SOURCES: {failed_sources} | |
COMPREHENSIVE RESEARCH DATA FROM MULTIPLE AUTHORITATIVE SOURCES: | |
{research_data['research_context']} | |
INSTRUCTIONS FOR {topic_type.upper()} RESEARCH REPORT: | |
Based on the above research data, create a comprehensive, well-structured report analyzing ALL the information provided. This is a {topic_type} research topic, so focus on relevant aspects for this domain. | |
Your report structure should include: | |
1. **EXECUTIVE SUMMARY** | |
- Key findings and main points about {research_data['query']} | |
- Critical insights and takeaways | |
- Brief overview of what the research reveals | |
2. **DETAILED ANALYSIS** | |
- In-depth examination of all collected information | |
- Multiple perspectives and viewpoints found in sources | |
- Connections between different pieces of information | |
- Contradictions or debates if any exist | |
3. **BACKGROUND & CONTEXT** | |
- Historical background (if relevant) | |
- Current situation and status | |
- Relevant context that helps understand the topic | |
4. **KEY FINDINGS & INSIGHTS** | |
- Most important discoveries from the research | |
- Patterns and trends identified | |
- Significant facts and statistics | |
- Expert opinions and analysis | |
5. **CURRENT STATUS & DEVELOPMENTS** | |
- Latest information and recent developments | |
- Current state of affairs | |
- Recent changes or updates | |
6. **DIFFERENT PERSPECTIVES** | |
- Various viewpoints found in sources | |
- Debates and discussions around the topic | |
- Conflicting information (if any) | |
7. **IMPLICATIONS & SIGNIFICANCE** | |
- Why this topic matters | |
- Impact and consequences | |
- Future implications | |
8. **DETAILED BREAKDOWN** | |
- Specific details from each major source | |
- Technical information (if applicable) | |
- Statistics and data points | |
- Quotes and specific information | |
9. **CONCLUSIONS** | |
- Summary of what was discovered | |
- Final thoughts and analysis | |
- Gaps in information (if any) | |
10. **SOURCES & REFERENCES** | |
- List all sources with proper attribution | |
- Include URLs for verification | |
- Note the reliability and type of each source | |
FORMATTING REQUIREMENTS: | |
- Use clear Markdown formatting with headers (##), subheaders (###), and bullet points | |
- Make the content engaging, informative, and well-organized | |
- Include specific details, examples, and quotes from the sources | |
- Highlight important information with **bold text** | |
- Use bullet points for lists and key points | |
- Organize information logically and coherently | |
- If information is conflicting, present both sides | |
- If insufficient information is available for any section, clearly state what could not be determined | |
CONTENT REQUIREMENTS: | |
- Base your analysis ONLY on the provided source content | |
- Do not make assumptions or add information not present in the sources | |
- Include specific details and examples from multiple sources | |
- Synthesize information from all sources, don't just summarize each one separately | |
- Maintain objectivity and present facts as found in sources | |
- If sources contradict each other, present both perspectives | |
- Focus on creating a comprehensive understanding of {research_data['query']} | |
TOPIC-SPECIFIC FOCUS FOR {topic_type.upper()}: | |
{get_topic_specific_instructions(topic_type)} | |
Remember: This report should be thorough, well-researched, and provide real value to someone wanting to understand {research_data['query']} comprehensively. | |
""" | |
response = model.generate_content(prompt) | |
return response.text | |
except Exception as e: | |
error_msg = str(e).lower() | |
print(f"Report generation error: {e}") # Debug info | |
if "api key not valid" in error_msg or "api_key_invalid" in error_msg: | |
return """❌ Invalid API key during report generation. | |
**Common issues:** | |
• Your API key may have expired or been revoked | |
• Check if you copied the complete key | |
• Try regenerating your API key at https://aistudio.google.com/""" | |
elif "quota" in error_msg or "limit" in error_msg: | |
return """❌ API quota exceeded during report generation. | |
**Solutions:** | |
• Check your usage at https://aistudio.google.com/ | |
• Wait for the quota to reset (usually monthly) | |
• Consider upgrading your plan if needed""" | |
elif "permission" in error_msg or "forbidden" in error_msg: | |
return """❌ API key doesn't have required permissions for report generation. | |
**Solutions:** | |
• Regenerate your API key at https://aistudio.google.com/ | |
• Make sure the API key is enabled for Gemini API""" | |
elif "network" in error_msg or "connection" in error_msg or "timeout" in error_msg: | |
return """❌ Network error during report generation. | |
**Troubleshooting:** | |
• Check your internet connection | |
• Try again in a few minutes | |
• The report generation process may take some time""" | |
elif "model" in error_msg: | |
return """❌ Model not available for report generation. | |
**Solutions:** | |
• Try using a different model | |
• Check Gemini API availability at https://status.cloud.google.com/""" | |
else: | |
return f"""❌ Error generating report: {str(e)} | |
**Debugging tips:** | |
• Try with a shorter research topic | |
• Check your internet connection | |
• Make sure your API key has sufficient quota""" | |
def get_topic_specific_instructions(topic_type: str) -> str: | |
"""Get specific instructions based on topic category""" | |
instructions = { | |
'politics': """ | |
- Focus on political implications, policy details, and governmental aspects | |
- Include information about key political figures, parties, and institutions | |
- Analyze policy impacts and political consequences | |
- Present multiple political perspectives objectively | |
- Include information about voting patterns, polls, or public opinion if available | |
""", | |
'history': """ | |
- Provide chronological context and timeline of events | |
- Include historical significance and long-term impacts | |
- Mention key historical figures, dates, and places | |
- Analyze causes and effects of historical events | |
- Connect historical events to modern implications | |
""", | |
'geography': """ | |
- Include specific geographical data, coordinates, and locations | |
- Provide demographic, climate, and physical geography information | |
- Discuss economic geography and natural resources | |
- Include maps, borders, and territorial information | |
- Analyze geographical impacts on society and economy | |
""", | |
'current_affairs': """ | |
- Focus on the most recent developments and breaking news | |
- Include timeline of recent events | |
- Analyze immediate impacts and short-term consequences | |
- Provide context for why this is currently significant | |
- Include quotes from recent statements or press releases | |
""", | |
'technology': """ | |
- Focus on technical specifications, capabilities, and limitations | |
- Include information about development timeline and key innovators | |
- Analyze technological implications and future potential | |
- Discuss adoption rates, market impact, and competitive landscape | |
- Include technical details and how the technology works | |
""", | |
'war': """ | |
- Provide strategic analysis and military context | |
- Include information about forces, tactics, and equipment involved | |
- Analyze geopolitical implications and international responses | |
- Discuss humanitarian impacts and civilian consequences | |
- Present timeline of conflict development | |
""", | |
'economics': """ | |
- Include specific economic data, statistics, and indicators | |
- Analyze market trends, financial impacts, and economic consequences | |
- Discuss effects on different sectors and stakeholders | |
- Include information about economic policies and their outcomes | |
- Provide context about economic significance and implications | |
""", | |
'science': """ | |
- Focus on scientific methodology, research findings, and evidence | |
- Include information about research institutions and scientists involved | |
- Explain scientific concepts and their implications | |
- Discuss peer review status and scientific consensus | |
- Analyze potential applications and future research directions | |
""" | |
} | |
return instructions.get(topic_type, "Focus on providing comprehensive, factual information with proper context and analysis.") | |
# Main research function | |
def run_research(topic: str, gemini_api_key: str, download_format: str = "markdown"): | |
"""Run the complete research process""" | |
if not gemini_api_key.strip(): | |
return "❌ Please enter your Gemini API key.", None, None, gr.update(visible=False), gr.update(visible=False) | |
if not topic.strip(): | |
return "❌ Please enter a research topic.", None, None, gr.update(visible=False), gr.update(visible=False) | |
# First validate the API key | |
is_valid, validation_message = validate_api_key(gemini_api_key) | |
if not is_valid: | |
return f"❌ {validation_message}", None, None, gr.update(visible=False), gr.update(visible=False) | |
try: | |
# Perform research | |
print(f"Starting research for: {topic}") | |
research_data = perform_research(topic) | |
if not research_data['sources']: | |
return "❌ No relevant sources found. Please try a different search term.", None, None, gr.update(visible=False), gr.update(visible=False) | |
print(f"Found {len(research_data['sources'])} sources, generating report...") | |
# Generate report | |
report = generate_research_report(research_data, gemini_api_key) | |
# Check if report generation was successful | |
if report.startswith("❌"): | |
return report, None, None, gr.update(visible=False), gr.update(visible=False) | |
# Create safe downloadable filenames from the TOPIC, not the report content | |
base_filename = sanitize_filename(topic) | |
if not base_filename.endswith('.md'): | |
base_filename = base_filename.replace('.md', '') + '_report.md' | |
pdf_path = None | |
try: | |
# Generate PDF using the original topic for filename | |
pdf_path = create_pdf_report(report, topic, research_data['sources'], base_filename) | |
print(f"PDF generated successfully: {pdf_path}") | |
except Exception as pdf_error: | |
print(f"PDF generation failed: {pdf_error}") | |
# Continue without PDF if it fails | |
print(f"Research completed successfully. MD: {base_filename}") | |
return report, base_filename, pdf_path, gr.update(visible=True), gr.update(visible=True) | |
except Exception as e: | |
print(f"Research error: {e}") # Debug info | |
error_msg = f"❌ An error occurred during research: {str(e)}" | |
return error_msg, None, None, gr.update(visible=False), gr.update(visible=False) | |
# Gradio interface with dark theme | |
def create_interface(): | |
# Dark theme CSS | |
dark_css = """ | |
/* Dark theme base */ | |
.gradio-container { | |
background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%) !important; | |
min-height: 100vh; | |
color: white !important; | |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
} | |
/* All blocks and containers */ | |
.block, .gr-box, .gr-form, .gr-panel { | |
background: rgba(255, 255, 255, 0.05) !important; | |
border: 1px solid rgba(255, 255, 255, 0.1) !important; | |
border-radius: 15px !important; | |
backdrop-filter: blur(10px) !important; | |
padding: 1.5rem !important; | |
margin: 0.5rem !important; | |
} | |
/* Text colors - ALL WHITE */ | |
body, p, span, div, label, h1, h2, h3, h4, h5, h6 { | |
color: white !important; | |
} | |
.gr-markdown, .gr-markdown * { | |
color: white !important; | |
background: transparent !important; | |
} | |
.gr-markdown h1, .gr-markdown h2, .gr-markdown h3 { | |
color: #64b5f6 !important; | |
border-bottom: 1px solid rgba(255, 255, 255, 0.2) !important; | |
} | |
/* Input fields */ | |
.gr-textbox, .gr-textbox input, .gr-textbox textarea { | |
background: rgba(255, 255, 255, 0.1) !important; | |
border: 1px solid rgba(255, 255, 255, 0.3) !important; | |
border-radius: 10px !important; | |
color: white !important; | |
padding: 12px !important; | |
} | |
.gr-textbox input::placeholder, .gr-textbox textarea::placeholder { | |
color: rgba(255, 255, 255, 0.6) !important; | |
} | |
.gr-textbox input:focus, .gr-textbox textarea:focus { | |
border-color: #64b5f6 !important; | |
box-shadow: 0 0 10px rgba(100, 181, 246, 0.3) !important; | |
background: rgba(255, 255, 255, 0.15) !important; | |
} | |
/* Buttons */ | |
.gr-button { | |
border-radius: 25px !important; | |
padding: 12px 24px !important; | |
font-weight: 600 !important; | |
text-transform: uppercase !important; | |
letter-spacing: 0.5px !important; | |
transition: all 0.3s ease !important; | |
border: none !important; | |
color: white !important; | |
} | |
.gr-button-primary { | |
background: linear-gradient(135deg, #64b5f6, #42a5f5) !important; | |
box-shadow: 0 4px 15px rgba(100, 181, 246, 0.4) !important; | |
} | |
.gr-button-primary:hover { | |
background: linear-gradient(135deg, #42a5f5, #2196f3) !important; | |
transform: translateY(-2px) !important; | |
box-shadow: 0 6px 20px rgba(100, 181, 246, 0.6) !important; | |
} | |
.gr-button-secondary { | |
background: linear-gradient(135deg, #546e7a, #37474f) !important; | |
box-shadow: 0 4px 15px rgba(84, 110, 122, 0.4) !important; | |
} | |
.gr-button-secondary:hover { | |
background: linear-gradient(135deg, #37474f, #263238) !important; | |
transform: translateY(-2px) !important; | |
} | |
/* Accordion */ | |
.gr-accordion { | |
background: rgba(255, 255, 255, 0.05) !important; | |
border: 1px solid rgba(255, 255, 255, 0.1) !important; | |
border-radius: 12px !important; | |
} | |
.gr-accordion summary { | |
color: white !important; | |
background: rgba(255, 255, 255, 0.1) !important; | |
padding: 1rem !important; | |
border-radius: 10px !important; | |
} | |
/* Feature cards */ | |
.feature-card { | |
background: rgba(100, 181, 246, 0.1) !important; | |
border: 1px solid rgba(100, 181, 246, 0.3) !important; | |
border-radius: 12px !important; | |
padding: 1.5rem !important; | |
margin: 1rem 0 !important; | |
border-left: 4px solid #64b5f6 !important; | |
backdrop-filter: blur(10px) !important; | |
} | |
.feature-card h3, .feature-card h4 { | |
color: #64b5f6 !important; | |
margin-bottom: 1rem !important; | |
} | |
.feature-card ul li { | |
color: rgba(255, 255, 255, 0.9) !important; | |
margin-bottom: 0.5rem !important; | |
} | |
/* Status indicators */ | |
.status-success { | |
background: rgba(76, 175, 80, 0.2) !important; | |
border: 1px solid #4caf50 !important; | |
border-left: 4px solid #4caf50 !important; | |
color: #a5d6a7 !important; | |
} | |
.status-error { | |
background: rgba(244, 67, 54, 0.2) !important; | |
border: 1px solid #f44336 !important; | |
border-left: 4px solid #f44336 !important; | |
color: #ef9a9a !important; | |
} | |
/* Hero section */ | |
.hero-section { | |
background: linear-gradient(135deg, #1565c0, #1976d2, #1e88e5) !important; | |
border-radius: 15px !important; | |
padding: 2rem !important; | |
margin-bottom: 2rem !important; | |
color: white !important; | |
box-shadow: 0 10px 30px rgba(0, 0, 0, 0.3) !important; | |
text-align: center !important; | |
} | |
/* Download section */ | |
.download-section { | |
background: rgba(100, 181, 246, 0.1) !important; | |
border: 1px solid rgba(100, 181, 246, 0.3) !important; | |
border-radius: 12px !important; | |
padding: 1.5rem !important; | |
text-align: center !important; | |
color: white !important; | |
} | |
/* Markdown content area */ | |
.gr-markdown { | |
background: rgba(255, 255, 255, 0.05) !important; | |
border: 1px solid rgba(255, 255, 255, 0.1) !important; | |
border-radius: 10px !important; | |
padding: 1.5rem !important; | |
max-height: 500px !important; | |
overflow-y: auto !important; | |
} | |
/* Responsive design */ | |
@media (max-width: 768px) { | |
.gradio-container { | |
padding: 0.5rem !important; | |
} | |
.block { | |
margin: 0.25rem !important; | |
padding: 1rem !important; | |
} | |
.hero-section { | |
padding: 1rem !important; | |
} | |
.feature-card { | |
padding: 1rem !important; | |
margin: 0.5rem 0 !important; | |
} | |
} | |
/* Scrollbar styling */ | |
::-webkit-scrollbar { | |
width: 8px; | |
} | |
::-webkit-scrollbar-track { | |
background: rgba(255, 255, 255, 0.1); | |
border-radius: 4px; | |
} | |
::-webkit-scrollbar-thumb { | |
background: rgba(100, 181, 246, 0.6); | |
border-radius: 4px; | |
} | |
::-webkit-scrollbar-thumb:hover { | |
background: rgba(100, 181, 246, 0.8); | |
} | |
""" | |
with gr.Blocks( | |
title=f"{APP_NAME} | Advanced AI Research Assistant", | |
theme=gr.themes.Base( | |
primary_hue="blue", | |
secondary_hue="gray", | |
neutral_hue="slate", | |
text_size="md", | |
radius_size="lg", | |
spacing_size="lg" | |
).set( | |
body_background_fill="linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%)", | |
block_background_fill="rgba(255, 255, 255, 0.05)", | |
block_border_color="rgba(255, 255, 255, 0.1)", | |
block_radius="15px", | |
button_primary_background_fill="linear-gradient(135deg, #64b5f6, #42a5f5)", | |
button_primary_text_color="white", | |
input_background_fill="rgba(255, 255, 255, 0.1)", | |
input_border_color="rgba(255, 255, 255, 0.3)", | |
body_text_color="white", | |
block_label_text_color="white" | |
), | |
css=dark_css | |
) as demo: | |
# Hero Section | |
with gr.Row(): | |
with gr.Column(): | |
gr.HTML(f""" | |
<div class="hero-section"> | |
<h1 style="font-size: 3rem; font-weight: bold; margin: 0; text-shadow: 2px 2px 4px rgba(0,0,0,0.3);"> | |
🔬 {APP_NAME} | |
</h1> | |
<h2 style="font-size: 1.5rem; margin: 0.5rem 0; opacity: 0.9;"> | |
{APP_DESCRIPTION} | |
</h2> | |
<p style="font-size: 1.1rem; margin: 1rem 0; opacity: 0.8;"> | |
Powered by Google Gemini AI & Advanced Web Research | |
</p> | |
</div> | |
""") | |
# Features Overview | |
with gr.Row(): | |
with gr.Column(): | |
gr.HTML(""" | |
<div class="feature-card"> | |
<h3>🎯 What this tool does:</h3> | |
<ul style="margin: 1rem 0;"> | |
<li><strong>🔍 Intelligent Search:</strong> Uses DuckDuckGo to find the most relevant sources</li> | |
<li><strong>📊 Content Analysis:</strong> Extracts and processes content from multiple websites</li> | |
<li><strong>🤖 AI Synthesis:</strong> Uses Google Gemini to create comprehensive reports</li> | |
<li><strong>📄 Professional Output:</strong> Generates both Markdown and PDF reports</li> | |
<li><strong>⚡ Fast & Reliable:</strong> Automated research in minutes, not hours</li> | |
</ul> | |
</div> | |
""") | |
# Simple API Key Section | |
with gr.Row(): | |
with gr.Column(): | |
gr.HTML(""" | |
<div class="feature-card"> | |
<h3>� API Key Setup</h3> | |
<p>Get your free Gemini API key from <a href="https://aistudio.google.com/" target="_blank" style="color: #64b5f6;">Google AI Studio</a></p> | |
</div> | |
""") | |
with gr.Row(): | |
with gr.Column(scale=3): | |
gemini_key = gr.Textbox( | |
label="🔐 Enter your Gemini API Key", | |
type="password", | |
placeholder="Paste your API key here...", | |
container=True | |
) | |
with gr.Column(scale=1): | |
validate_btn = gr.Button( | |
"🔍 Validate", | |
variant="secondary", | |
size="lg" | |
) | |
validation_output = gr.HTML(visible=False) | |
# Main Research Interface | |
gr.HTML("<h2 style='text-align: center; color: #2c3e50; margin: 2rem 0;'>🔬 Start Your Research</h2>") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
research_topic = gr.Textbox( | |
label="🎯 Research Topic", | |
placeholder="Enter your research topic here... (e.g., 'Latest developments in quantum computing', 'Climate change solutions 2024', 'AI trends in healthcare')", | |
lines=3, | |
container=True | |
) | |
with gr.Row(): | |
research_btn = gr.Button( | |
"🚀 Start Deep Research", | |
variant="primary", | |
size="lg", | |
scale=2 | |
) | |
with gr.Column(scale=1): | |
gr.HTML("<div style='padding: 1rem;'></div>") | |
with gr.Column(scale=1): | |
gr.HTML(""" | |
<div class="feature-card"> | |
<h4>💡 Research Tips:</h4> | |
<ul style="font-size: 0.9rem;"> | |
<li><strong>Be Specific:</strong> "AI in healthcare 2024" vs "AI"</li> | |
<li><strong>Include Context:</strong> Add year, location, or specific aspect</li> | |
<li><strong>Ask Questions:</strong> "What is the impact of...?"</li> | |
<li><strong>Current Events:</strong> Include "latest" or "current"</li> | |
<li><strong>Multiple Angles:</strong> "Causes and solutions of..."</li> | |
</ul> | |
<div style="margin-top: 1rem; padding: 0.8rem; background: rgba(76, 175, 80, 0.1); border-radius: 6px; border-left: 3px solid #4caf50;"> | |
<strong>📊 Research Power:</strong><br> | |
<small>10+ sources • Topic categorization • Authoritative domains • AI synthesis</small> | |
</div> | |
</div> | |
""") | |
# Progress and Results Section | |
with gr.Row(): | |
with gr.Column(): | |
progress_html = gr.HTML(visible=False) | |
output = gr.Markdown( | |
value="Your comprehensive research report will appear here...", | |
label="📊 Research Report", | |
container=True, | |
height=400 | |
) | |
# Download Section | |
with gr.Row(): | |
with gr.Column(): | |
download_section = gr.HTML(visible=False) | |
with gr.Row(): | |
with gr.Column(): | |
download_md_btn = gr.DownloadButton( | |
"📝 Download Markdown", | |
visible=False, | |
variant="secondary", | |
size="lg" | |
) | |
with gr.Column(): | |
download_pdf_btn = gr.DownloadButton( | |
"📄 Download PDF Report", | |
visible=False, | |
variant="primary", | |
size="lg" | |
) | |
# Footer | |
gr.HTML(f""" | |
<div style="text-align: center; padding: 2rem; color: #7f8c8d; border-top: 1px solid #ecf0f1; margin-top: 3rem;"> | |
<p>🔬 <strong>{APP_NAME} {APP_VERSION}</strong> | Advanced AI Research Assistant</p> | |
<p>Powered by Google Gemini AI • Built with ❤️ for researchers worldwide</p> | |
</div> | |
""") | |
# Event Handlers | |
def validate_key_handler(api_key): | |
if not api_key: | |
return gr.update( | |
visible=True, | |
value='<div class="status-error"><h4>❌ API Key Required</h4><p>Please enter your Gemini API key above.</p></div>' | |
) | |
is_valid, message = validate_api_key(api_key) | |
if is_valid: | |
return gr.update( | |
visible=True, | |
value=f'<div class="status-success"><h4>✅ API Key Valid!</h4><p>{message}</p><p>You\'re ready to start researching!</p></div>' | |
) | |
else: | |
return gr.update( | |
visible=True, | |
value=f'<div class="status-error"><h4>❌ API Key Issue</h4><div style="white-space: pre-line;">{message}</div></div>' | |
) | |
def research_handler(topic, api_key): | |
if not api_key.strip(): | |
return ( | |
"❌ Please enter and validate your Gemini API key first.", | |
None, None, | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False) | |
) | |
if not topic.strip(): | |
return ( | |
"❌ Please enter a research topic.", | |
None, None, | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False) | |
) | |
# Show progress | |
progress_msg = f""" | |
<div class="feature-card"> | |
<h4>🔄 Research in Progress...</h4> | |
<p>📊 Analyzing: <strong>{topic}</strong></p> | |
<p>⏳ This may take 1-2 minutes. Please wait...</p> | |
</div> | |
""" | |
return run_research(topic, api_key) | |
# Wire up events | |
validate_btn.click( | |
fn=validate_key_handler, | |
inputs=[gemini_key], | |
outputs=[validation_output] | |
) | |
research_btn.click( | |
fn=run_research, | |
inputs=[research_topic, gemini_key], | |
outputs=[output, download_md_btn, download_pdf_btn, download_md_btn, download_pdf_btn] | |
) | |
# Download handlers | |
def create_md_file(content): | |
if content and content.strip(): | |
return content | |
return "No content available" | |
def get_pdf_file(pdf_path): | |
if pdf_path and os.path.exists(pdf_path): | |
return pdf_path | |
return None | |
download_md_btn.click( | |
fn=create_md_file, | |
inputs=[output], | |
outputs=[download_md_btn] | |
) | |
download_pdf_btn.click( | |
fn=get_pdf_file, | |
inputs=[download_pdf_btn], | |
outputs=[download_pdf_btn] | |
) | |
return demo | |
# Main execution | |
if __name__ == "__main__": | |
demo = create_interface() | |
demo.launch() | |