""" GAIA Agent - Simplified Working Version Complete AGNO Tools with Basic Multimodal Integration This agent provides comprehensive GAIA evaluation capabilities using: - All AGNO tools (calculator, python, wikipedia, arxiv, firecrawl, exa, file, shell) - Basic multimodal tools (Mistral Vision when available) - Simple, reliable answer formatting - No complex dependencies that cause import failures Advantages: - Single agent for all GAIA tasks (text, math, multimodal) - AGNO's native orchestration handles tool selection - Simple, reliable architecture that works in HuggingFace Space - Consistent error handling and response formatting - No complex import dependencies """ import os import logging from typing import Dict, Any, List, Optional from pathlib import Path from agno.agent import Agent from agno.models.mistral import MistralChat # Import European open-source multimodal tools try: from .mistral_multimodal_agent import OpenSourceMultimodalTools MULTIMODAL_AVAILABLE = True except ImportError: try: from mistral_multimodal_agent import OpenSourceMultimodalTools MULTIMODAL_AVAILABLE = True except ImportError: OpenSourceMultimodalTools = None MULTIMODAL_AVAILABLE = False # Simple answer formatting without complex dependencies class SimpleAnswerFormatter: """Simple answer formatter for GAIA evaluation.""" def format_answer(self, response: str, question: str = None) -> str: """Format response for GAIA evaluation.""" if not response: return "" # Clean the response answer = response.strip() # Remove common prefixes prefixes_to_remove = [ "The answer is:", "Answer:", "Final answer:", "The final answer is:", "Based on my analysis,", "According to my research,", ] for prefix in prefixes_to_remove: if answer.lower().startswith(prefix.lower()): answer = answer[len(prefix):].strip() # Remove markdown formatting answer = answer.replace("**", "").replace("*", "") # Extract final answer if it's in a specific format lines = answer.split('\n') for line in lines: line = line.strip() if line and not line.startswith('#') and not line.startswith('-'): # This looks like a final answer return line return answer # Load environment variables from .env file def load_env_file(): """Load environment variables from .env file if it exists.""" env_file = Path('.env') if env_file.exists(): with open(env_file, 'r') as f: for line in f: line = line.strip() if line and not line.startswith('#') and '=' in line: key, value = line.split('=', 1) os.environ[key.strip()] = value.strip() # Load environment variables at module level load_env_file() logger = logging.getLogger(__name__) class GAIAAgent: """ GAIA Agent with comprehensive AGNO tools and basic multimodal capabilities. This agent combines all AGNO tools with basic multimodal processing, providing a single interface for all GAIA evaluation tasks including: - Text and mathematical reasoning - Basic image analysis using Mistral Vision - Web research and content extraction - Simple, reliable answer formatting """ def __init__(self): """Initialize the unified AGNO agent.""" logger.info("🚀 Initializing Unified AGNO Agent...") # Initialize simple answer formatter self.response_formatter = SimpleAnswerFormatter() # Initialize all AGNO tools self.tools = self._init_all_agno_tools() # Initialize European open-source multimodal tools self.multimodal_tools = self._init_multimodal_tools() if self.multimodal_tools: self.tools.extend(self.multimodal_tools.tools) # Check for required API key self.mistral_api_key = os.getenv("MISTRAL_API_KEY") if not self.mistral_api_key: logger.error("❌ MISTRAL_API_KEY not found - AGNO agent requires this for orchestration") self.agent = None self.available = False return # Create the unified AGNO agent self.agent = self._create_agno_agent() # Set availability flag self.available = self.agent is not None if self.available: logger.info("✅ Unified AGNO Agent initialized successfully") logger.info(f"📊 Available tools: {len(self.tools)}") else: logger.error("❌ Unified AGNO Agent initialization failed") def _init_all_agno_tools(self) -> List[Any]: """Initialize all available AGNO tools.""" tools = [] tool_status = {} # Define all AGNO tools with their requirements tools_config = [ # Core computational tools { 'name': 'calculator', 'module': 'agno.tools.calculator', 'class': 'CalculatorTools', 'required_env': None, 'description': 'Mathematical calculations and operations' }, { 'name': 'python', 'module': 'agno.tools.python', 'class': 'PythonTools', 'required_env': None, 'description': 'Python code execution and analysis' }, # Knowledge and research tools { 'name': 'wikipedia', 'module': 'agno.tools.wikipedia', 'class': 'WikipediaTools', 'required_env': None, 'description': 'Wikipedia knowledge retrieval' }, { 'name': 'arxiv', 'module': 'agno.tools.arxiv', 'class': 'ArxivTools', 'required_env': None, 'description': 'Academic research via ArXiv' }, # Web tools { 'name': 'firecrawl', 'module': 'agno.tools.firecrawl', 'class': 'FirecrawlTools', 'required_env': 'FIRECRAWL_API_KEY', 'description': 'Web content extraction' }, { 'name': 'exa', 'module': 'agno.tools.exa', 'class': 'ExaTools', 'required_env': 'EXA_API_KEY', 'description': 'Advanced web search' }, # System tools { 'name': 'file', 'module': 'agno.tools.file', 'class': 'FileTools', 'required_env': None, 'description': 'File operations and management' }, { 'name': 'shell', 'module': 'agno.tools.shell', 'class': 'ShellTools', 'required_env': None, 'description': 'System shell operations' }, # Optional multimodal tools { 'name': 'youtube', 'module': 'agno.tools.youtube', 'class': 'YouTubeTools', 'required_env': None, 'description': 'YouTube video transcription and analysis', 'optional_deps': ['youtube_transcript_api'] }, ] for tool_config in tools_config: tool_name = tool_config['name'] module_path = tool_config['module'] class_name = tool_config['class'] required_env = tool_config['required_env'] description = tool_config['description'] optional_deps = tool_config.get('optional_deps', []) try: # Check if required environment variable is available if required_env and not os.getenv(required_env): logger.warning(f"⚠️ {required_env} not found, {tool_name} tool unavailable") tool_status[tool_name] = f"Missing {required_env}" continue # Import and instantiate the tool module = __import__(module_path, fromlist=[class_name]) tool_class = getattr(module, class_name) # Initialize tool with appropriate parameters if tool_name == 'exa': tool_instance = tool_class(api_key=os.getenv('EXA_API_KEY')) elif tool_name == 'firecrawl': tool_instance = tool_class(api_key=os.getenv('FIRECRAWL_API_KEY')) else: tool_instance = tool_class() tools.append(tool_instance) tool_status[tool_name] = "✅ Available" logger.info(f"✅ {class_name} initialized: {description}") except ImportError as e: if optional_deps and any(dep in str(e) for dep in optional_deps): logger.warning(f"⚠️ {class_name} not available: missing optional dependency") tool_status[tool_name] = f"Missing optional dependency" else: logger.warning(f"⚠️ {class_name} not available: {e}") tool_status[tool_name] = f"Import error: {str(e)[:50]}" except Exception as e: logger.warning(f"⚠️ {class_name} not available: {e}") tool_status[tool_name] = f"Error: {str(e)[:50]}" # Log tool availability summary logger.info("📊 AGNO Tools Status:") for tool_name, status in tool_status.items(): logger.info(f" {tool_name}: {status}") return tools def _init_multimodal_tools(self) -> Optional[Any]: """Initialize European open-source multimodal tools.""" if not MULTIMODAL_AVAILABLE: logger.warning("⚠️ European open-source multimodal tools not available") return None try: multimodal_tools = OpenSourceMultimodalTools() logger.info("✅ European open-source multimodal tools initialized") logger.info("🇪🇺 Features: Image analysis (BLIP-2/Mistral Vision), Audio transcription (Faster-Whisper), Document analysis") return multimodal_tools except Exception as e: logger.warning(f"⚠️ Failed to initialize multimodal tools: {e}") return None def _create_agno_agent(self) -> Optional[Agent]: """Create the unified AGNO agent with all available tools.""" if not self.tools: logger.warning("⚠️ No AGNO tools available, creating agent without tools") try: # Create Mistral model for the agent model = MistralChat( api_key=self.mistral_api_key, id="mistral-large-latest", # Use latest large model for better function calling temperature=0.1, # Low temperature for factual accuracy max_tokens=2000 ) # Create the unified agent with all available tools agent = Agent( model=model, tools=self.tools, instructions=self._get_agent_instructions(), show_tool_calls=True, # Enable tool call visibility for debugging markdown=True, debug_mode=True # Enable debug mode to see tool usage ) logger.info(f"✅ Unified AGNO Agent created with {len(self.tools)} tools") return agent except Exception as e: logger.error(f"❌ Failed to create AGNO agent: {e}") return None def _get_agent_instructions(self) -> str: """Get comprehensive instructions for the unified AGNO agent.""" return """You are a GAIA evaluation agent with access to comprehensive AGNO tools. CRITICAL GAIA EVALUATION REQUIREMENTS: 1. EXACT ANSWER MATCHING: Your final answer must match the expected answer EXACTLY 2. NO EXPLANATIONS: Provide only the final answer, no reasoning or explanations 3. PRECISE FORMAT: Follow the exact format expected (number, text, etc.) 4. FACTUAL ACCURACY: Use tools to verify all information before answering AVAILABLE TOOLS AND WHEN TO USE THEM: CORE COMPUTATIONAL TOOLS: 1. CALCULATOR TOOLS - Use for: - Mathematical calculations and operations - Unit conversions and numerical computations - Complex mathematical expressions 2. PYTHON TOOLS - Use for: - Code execution and analysis - Data processing and calculations - Algorithm implementation KNOWLEDGE AND RESEARCH TOOLS: 3. WIKIPEDIA TOOLS - Use ONLY when: - Wikipedia is explicitly mentioned in the question - Question specifically asks about Wikipedia content - Question references "according to Wikipedia" or similar 4. ARXIV TOOLS - Use for: - Academic research and scientific papers - Technical and research-oriented questions - Latest scientific developments WEB RESEARCH TOOLS: 5. EXA TOOLS - Use for: - General web search and research - Finding current information and recent developments - Biographical information and general knowledge queries - Any web-based fact-checking and information gathering 6. FIRECRAWL TOOLS - Use for: - Web content extraction from specific URLs provided in the question - Detailed webpage analysis when URL is given - Content scraping when specific URLs need to be processed SYSTEM TOOLS: 7. FILE TOOLS - Use for: - File operations and management - Reading and processing local files - File system operations 8. SHELL TOOLS - Use for: - System operations and commands - Environment queries - System-level information gathering 9. YOUTUBE TOOLS - Use for: - YouTube video transcription - Video content analysis via transcripts - Understanding video content without watching MULTIMODAL TOOLS (European Open-Source): 10. IMAGE ANALYSIS - Use for: - Analyzing images using BLIP-2 or Mistral Vision - Answering questions about image content - Visual reasoning and description 11. AUDIO TRANSCRIPTION - Use for: - Transcribing audio files using Faster-Whisper (European community-driven) - Converting speech to text for analysis - Processing audio content 12. DOCUMENT ANALYSIS - Use for: - Analyzing document content and answering questions - Text-based document processing - Document question-answering using DistilBERT GENERAL STRATEGY: 1. Analyze the question to determine the most appropriate tool(s) 2. Use tools systematically to gather accurate information 3. Synthesize findings into a precise, compliant answer 4. Always prioritize accuracy and factual correctness 5. Use multiple tools if needed for verification ANSWER FORMAT: - Provide ONLY the final answer - No explanations, reasoning, or additional text - Match the expected format exactly (number, text, date, etc.) - Ensure factual accuracy through tool verification""" def __call__(self, question: str) -> str: """Process a question using the unified AGNO agent.""" if not self.available: logger.error("❌ Unified AGNO Agent not available - check MISTRAL_API_KEY") return "Agent not available" try: logger.info(f"🤔 Processing question with Unified AGNO Agent: {question[:100]}...") # Use AGNO agent to process the question with full orchestration response = self.agent.run(question) # Extract the response content if hasattr(response, 'content'): raw_answer = response.content elif isinstance(response, str): raw_answer = response else: raw_answer = str(response) # Format the response for GAIA evaluation formatted_answer = self.response_formatter.format_answer(raw_answer, question) logger.info(f"✅ Question processed successfully") logger.info(f"📝 Raw answer: {raw_answer[:200]}...") logger.info(f"🎯 Formatted answer: {formatted_answer}") return formatted_answer except Exception as e: logger.error(f"❌ Error processing question: {e}") return f"Error: {str(e)}" def get_tool_status(self) -> Dict[str, Any]: """Get the current status of all tools.""" multimodal_status = {} if hasattr(self, 'multimodal_tools') and self.multimodal_tools: multimodal_status = self.multimodal_tools.get_capabilities_status() return { 'available': self.available, 'tools_count': len(self.tools) if self.tools else 0, 'mistral_api_key_present': bool(self.mistral_api_key), 'agent_created': self.agent is not None, 'multimodal_tools_available': MULTIMODAL_AVAILABLE, 'multimodal_status': multimodal_status } # Create global agent instance gaia_agent = GAIAAgent() def process_question(question: str) -> str: """Process a question using the GAIA agent.""" return gaia_agent(question) def get_agent_status() -> Dict[str, Any]: """Get the current status of the GAIA agent.""" return gaia_agent.get_tool_status()