import os import json import tempfile import logging from typing import Dict, List, Any, Optional, Tuple from datetime import datetime import asyncio import gradio as gr import pandas as pd from agent import GeminiGAIAAgent, GAIAQuestion # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class GeminiGAIAApp: """ Gradio application for Gemini-powered GAIA Benchmark Agent Hugging Face Agents Course - Unit 4 Final Assignment """ def __init__(self): self.agent = None self.conversation_history = [] self.current_question_id = 0 # Agent metadata self.agent_info = { "name": "Gemini GAIA Benchmark Agent", "author": "AdilzhanB", "course": "Hugging Face Agents Course - Unit 4", "model": "Google Gemini 2.0-flash-001", "version": "1.0", "created": "2025-06-17 15:32:22", "capabilities": [ "Complex multi-step reasoning", "Advanced mathematical calculations", "Real-time web search", "Multimodal file analysis", "Natural language understanding" ] } # Huggingface repository link self.agent_code_link = "https://huggingface.co/spaces/AdilzhanB/Gemini-GAIA-Agent" def _initialize_agent(self, api_key: Optional[str] = None): """Initialize the Gemini GAIA agent""" try: self.agent = GeminiGAIAAgent( model_name="gemini-2.0-flash-001", api_key=api_key, temperature=0.1, verbose=False ) logger.info("Gemini agent initialized successfully") return "✅ Agent initialized successfully!" except Exception as e: error_msg = f"Failed to initialize agent: {str(e)}" logger.error(error_msg) self.agent = None return f"❌ {error_msg}" def solve_question(self, question_text: str, difficulty_level: int, uploaded_file, api_key: Optional[str] = None) -> Tuple[str, str, str, str, str, str]: """ Main function to solve GAIA questions Returns: (reasoning, tools_used, confidence, processing_time, final_answer, status) """ try: # Initialize agent if needed or API key changed if not self.agent or (api_key and api_key.strip()): init_status = self._initialize_agent(api_key.strip() if api_key else None) if "❌" in init_status: return "", "", "", "", "", init_status if not self.agent: return "", "", "", "", "", "❌ Agent not initialized. Please provide a valid Google API key." if not question_text.strip(): return "", "", "", "", "", "❌ Please enter a question." # Handle file upload file_path = None file_name = None if uploaded_file is not None: file_path = uploaded_file.name file_name = os.path.basename(file_path) # Create GAIA question self.current_question_id += 1 gaia_question = GAIAQuestion( question_id=f"user_question_{self.current_question_id}", question=question_text, level=difficulty_level, file_path=file_path, file_name=file_name ) # Solve the question logger.info(f"Solving question: {question_text[:50]}...") result = self.agent.solve_gaia_question(gaia_question) # Store in conversation history self.conversation_history.append({ "timestamp": datetime.now().isoformat(), "question": question_text, "result": result }) # Extract results if result.get("error"): return "", "", "", "", "", f"❌ Error: {result.get('agent_response', 'Unknown error')}" # Format reasoning steps reasoning_steps = "\n".join([ f"{i+1}. {step}" for i, step in enumerate(result.get("reasoning_steps", [])) ]) if not reasoning_steps: reasoning_steps = "Gemini processed the question using its internal reasoning." # Format tools used tools_used = ", ".join(result.get("tools_used", ["None"])) if not tools_used or tools_used == "None": tools_used = "Gemini's built-in capabilities" # Get other metrics confidence = f"{result.get('confidence_score', 0.0):.2f}" processing_time = f"{result.get('processing_time_seconds', 0):.2f}s" final_answer = result.get("agent_response", "No answer generated") # Success status status = f"✅ Question solved successfully! (Model: {result.get('model_used', 'Gemini')})" logger.info(f"Question solved successfully. Tools: {tools_used}, Confidence: {confidence}") return ( reasoning_steps, tools_used, confidence, processing_time, final_answer, status ) except Exception as e: error_msg = f"❌ Error solving question: {str(e)}" logger.error(error_msg) return "", "", "", "", "", error_msg def get_conversation_history(self) -> str: """Get formatted conversation history""" if not self.conversation_history: return "No questions solved yet. Try asking a GAIA-style question!" history_text = "## 📚 Recent Conversation History\n\n" for i, entry in enumerate(self.conversation_history[-5:], 1): # Show last 5 result = entry['result'] history_text += f"### Question {i}\n" history_text += f"**Asked:** {entry['question'][:150]}...\n" history_text += f"**Level:** {result.get('level', 'N/A')}\n" history_text += f"**Tools Used:** {', '.join(result.get('tools_used', ['None']))}\n" history_text += f"**Confidence:** {result.get('confidence_score', 0):.2f}\n" history_text += f"**Answer Preview:** {result.get('agent_response', 'No answer')[:200]}...\n" history_text += f"**Time:** {entry['timestamp'][:19]}\n\n" history_text += "---\n\n" return history_text def clear_history(self) -> str: """Clear conversation history""" self.conversation_history = [] self.current_question_id = 0 return "🗑️ History cleared successfully!" def test_agent_capabilities(self, api_key: Optional[str] = None) -> str: """Test agent and tool capabilities""" try: # Initialize agent if needed if not self.agent or (api_key and api_key.strip()): init_status = self._initialize_agent(api_key.strip() if api_key else None) if "❌" in init_status: return init_status if not self.agent: return "❌ Agent not initialized. Please provide a valid Google API key." # Test tools tool_results = self.agent.test_tools() result_text = "## 🔧 Agent Capability Test Results\n\n" result_text += f"**Model:** {self.agent.model_name}\n" result_text += f"**Status:** {'✅ Initialized' if self.agent.model else '❌ Not initialized'}\n\n" result_text += "### Tool Test Results\n" for tool_name, result in tool_results.items(): status_icon = "✅" if "✅" in result else "❌" result_text += f"{status_icon} **{tool_name.title()}**: {result}\n" result_text += "\n### Available Capabilities\n" for capability in self.agent_info["capabilities"]: result_text += f"- ✅ {capability}\n" return result_text except Exception as e: return f"❌ Error testing agent: {str(e)}" def get_example_question(self, level: int, example_type: str) -> Tuple[str, int]: """Get example questions based on level and type""" examples = { 1: { "math": "What is the square root of 144?", "factual": "What is the capital of Japan?", "conversion": "Convert 100 degrees Fahrenheit to Celsius" }, 2: { "financial": "If I invest $1000 at 5% annual compound interest, how much will I have after 3 years?", "current": "What is the current population of Tokyo according to the latest data?", "analysis": "Calculate the average temperature if the daily temperatures were 72°F, 75°F, 68°F, and 71°F" }, 3: { "complex": "Based on current economic indicators, what are the main recession risks for 2024?", "research": "Compare the GDP growth rates of the top 5 economies in 2023 and identify key trends", "multimodal": "Analyze any uploaded data file and provide insights about patterns and trends" } } question = examples.get(level, {}).get(example_type, "What is 2 + 2?") return question, level def create_interface(self): """Create the comprehensive Gradio interface""" # Custom CSS for professional styling custom_css = """ .gradio-container { max-width: 1400px !important; margin: 0 auto; } .main-header { text-align: center; background: linear-gradient(90deg, #4285f4, #34a853, #fbbc05, #ea4335); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; margin-bottom: 20px; } .info-box { background-color: #c0c7cf; border-left: 4px solid #4285f4; padding: 15px; margin: 10px 0; border-radius: 5px; } """ with gr.Blocks(css=custom_css, title="Gemini GAIA Agent", theme=gr.themes.Soft()) as interface: # Main Header gr.HTML("""

🚀 Gemini GAIA Benchmark Agent

""") # Agent Information with gr.Row(): gr.Markdown(f"""

🤖 Agent Information

""") # API Key Configuration with gr.Row(): with gr.Column(): api_key_input = gr.Textbox( label="🔑 Google API Key (Required)", placeholder="Enter your Google AI API key here...", type="password", info="Get your free API key from: https://makersuite.google.com/app/apikey" ) test_agent_btn = gr.Button("🧪 Test Agent & Tools", variant="secondary") # Main Question Interface gr.Markdown("## 💭 Ask Your GAIA Question") with gr.Row(): # Left Panel - Input with gr.Column(scale=2): question_input = gr.Textbox( label="📝 Your Question", placeholder="Enter your GAIA-style question here...\n\nExamples:\n- What is the compound interest on $1000 at 5% for 3 years?\n- What is the current population of Tokyo?\n- Analyze the uploaded CSV data and find patterns", lines=4, max_lines=8 ) with gr.Row(): difficulty_slider = gr.Slider( label="🎯 Difficulty Level", minimum=1, maximum=3, value=2, step=1, info="1=Basic | 2=Intermediate | 3=Advanced" ) file_upload = gr.File( label="📎 Upload File (Optional)", file_types=[".txt", ".csv", ".json", ".xlsx", ".png", ".jpg", ".jpeg", ".gif", ".pdf"], ) solve_button = gr.Button( "🚀 Solve with Gemini", variant="primary", size="lg", scale=2 ) # Right Panel - Quick Examples with gr.Column(scale=1): gr.Markdown("### 📚 Quick Examples") # Level 1 Examples gr.Markdown("**Level 1 (Basic)**") with gr.Row(): math_btn = gr.Button("🧮 Math", size="sm") factual_btn = gr.Button("🌍 Factual", size="sm") convert_btn = gr.Button("🔄 Convert", size="sm") # Level 2 Examples gr.Markdown("**Level 2 (Intermediate)**") with gr.Row(): finance_btn = gr.Button("💰 Finance", size="sm") current_btn = gr.Button("📊 Current", size="sm") analysis_btn = gr.Button("📈 Analysis", size="sm") # Level 3 Examples gr.Markdown("**Level 3 (Advanced)**") with gr.Row(): complex_btn = gr.Button("🧠 Complex", size="sm") research_btn = gr.Button("🔬 Research", size="sm") multimodal_btn = gr.Button("🖼️ Multimodal", size="sm") # Output Section gr.Markdown("## 🎯 Agent Response") with gr.Row(): # Main Answer with gr.Column(scale=2): final_answer_output = gr.Textbox( label="🤖 Gemini's Answer", lines=8, max_lines=15, show_copy_button=True, info="Complete response with reasoning and solution" ) # Metrics with gr.Column(scale=1): confidence_output = gr.Textbox( label="📊 Confidence Score", max_lines=1, info="Agent's confidence in the answer" ) processing_time_output = gr.Textbox( label="⏱️ Processing Time", max_lines=1, info="Time taken to solve" ) tools_used_output = gr.Textbox( label="🔧 Tools Used", max_lines=3, info="Which capabilities were utilized" ) status_output = gr.Textbox( label="✅ Status", max_lines=2, info="Execution status and model info" ) # Detailed Reasoning (Expandable) with gr.Accordion("🔍 Detailed Reasoning Steps", open=False): reasoning_output = gr.Textbox( label="Step-by-Step Reasoning", lines=10, show_copy_button=True, info="Detailed breakdown of the solution process" ) # Additional Features Tabs with gr.Tabs(): # Tool Testing Tab with gr.TabItem("🛠️ Agent Capabilities"): tool_test_output = gr.Markdown( "Click 'Test Agent & Tools' above to check all capabilities.", elem_classes=["info-box"] ) gr.Markdown(""" ### 🎯 GAIA Benchmark Capabilities This agent is designed to excel at: - **🧠 Complex Reasoning**: Multi-step logical problem solving - **🧮 Mathematical Operations**: Advanced calculations and financial modeling - **🔍 Web Search**: Real-time information retrieval using DuckDuckGo - **📄 File Analysis**: Processing text, CSV, JSON, and image files - **🖼️ Multimodal Understanding**: Analyzing images with Gemini's vision capabilities - **📊 Data Processing**: Statistical analysis and pattern recognition """) # History Tab with gr.TabItem("📚 Conversation History"): with gr.Row(): refresh_history_btn = gr.Button("🔄 Refresh History", variant="secondary") clear_history_btn = gr.Button("🗑️ Clear History", variant="stop") history_output = gr.Markdown( "No questions solved yet. Start by asking a GAIA question!", elem_classes=["info-box"] ) # Documentation Tab with gr.TabItem("📖 About GAIA"): gr.Markdown(f""" ### 🎯 What is GAIA? **GAIA (General AI Assistants)** is a comprehensive benchmark designed to evaluate AI assistants on real-world tasks that require: #### 🧠 Core Capabilities Tested - **Reasoning**: Complex multi-step problem solving and logical inference - **Multimodal Understanding**: Processing text, images, documents, and data files - **Web Browsing**: Searching for and utilizing current information - **Tool Use**: Effective integration and use of various computational tools #### 📊 Difficulty Levels - **Level 1**: Basic factual questions and simple reasoning tasks - **Level 2**: Multi-step problems requiring tool integration - **Level 3**: Complex tasks requiring advanced reasoning and multiple tools #### 🚀 This Agent's Approach This implementation uses **Google Gemini 1.5 Pro** for its: - Superior multimodal capabilities (text + images) - Advanced reasoning and problem-solving - Large context window for complex tasks - Built-in safety and reliability features #### 🔗 Technical Details - **Model**: Google Gemini 1.5 Pro - **Framework**: Custom Python implementation - **Tools**: Calculator, Web Search, File Analyzer - **Interface**: Gradio 4.0+ - **Author**: {self.agent_info['author']} #### 📚 Resources - [GAIA Benchmark Paper](https://arxiv.org/abs/2311.12983) - [GAIA Dataset](https://huggingface.co/datasets/gaia-benchmark/GAIA) - [Google AI Studio](https://makersuite.google.com/) - [Course Repository]({self.agent_code_link}) """) # Wire up all the interactions # Main solve function solve_button.click( self.solve_question, inputs=[question_input, difficulty_slider, file_upload, api_key_input], outputs=[reasoning_output, tools_used_output, confidence_output, processing_time_output, final_answer_output, status_output] ) # Tool testing test_agent_btn.click( self.test_agent_capabilities, inputs=[api_key_input], outputs=[tool_test_output] ) # History management refresh_history_btn.click( self.get_conversation_history, outputs=[history_output] ) clear_history_btn.click( self.clear_history, outputs=[history_output] ) # Example buttons - Level 1 math_btn.click( lambda: self.get_example_question(1, "math"), outputs=[question_input, difficulty_slider] ) factual_btn.click( lambda: self.get_example_question(1, "factual"), outputs=[question_input, difficulty_slider] ) convert_btn.click( lambda: self.get_example_question(1, "conversion"), outputs=[question_input, difficulty_slider] ) # Example buttons - Level 2 finance_btn.click( lambda: self.get_example_question(2, "financial"), outputs=[question_input, difficulty_slider] ) current_btn.click( lambda: self.get_example_question(2, "current"), outputs=[question_input, difficulty_slider] ) analysis_btn.click( lambda: self.get_example_question(2, "analysis"), outputs=[question_input, difficulty_slider] ) # Example buttons - Level 3 complex_btn.click( lambda: self.get_example_question(3, "complex"), outputs=[question_input, difficulty_slider] ) research_btn.click( lambda: self.get_example_question(3, "research"), outputs=[question_input, difficulty_slider] ) multimodal_btn.click( lambda: self.get_example_question(3, "multimodal"), outputs=[question_input, difficulty_slider] ) # Footer gr.HTML(f"""

🎓 Hugging Face Agents Course - Unit 4 Final Assignment

Gemini GAIA Benchmark Agent | Created with ❤️ by {self.agent_info['author']}

🔗 View Source Code | 📚 Course Materials | 🤖 Google AI Studio

Powered by Google Gemini 1.5 Pro • Built with Gradio • Current Time (UTC): 2025-06-17 15:32:22

""") return interface def main(): """Main function to launch the Gemini GAIA application""" # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger.info("🚀 Starting Gemini GAIA Benchmark Agent Application...") # Create the application app = GeminiGAIAApp() interface = app.create_interface() # Launch configuration for Hugging Face Spaces launch_kwargs = { "share": True, # Create public shareable link "server_name": "0.0.0.0", # Allow external connections "server_port": 7860, # Default Gradio port "show_error": True, # Show errors in UI "quiet": False, # Show startup logs "favicon_path": None, # Custom favicon "auth": None, # No authentication required } logger.info("🌐 Launching Gradio interface...") logger.info("🔗 The app will be available at http://localhost:7860") try: interface.launch(**launch_kwargs) except Exception as e: logger.error(f"❌ Failed to launch application: {str(e)}") print("Please check your environment setup and try again.") if __name__ == "__main__": main()