|
""" |
|
Tests for the integrated GAIA agent. |
|
|
|
These tests verify that all components of the integrated agent work correctly |
|
and that the agent correctly handles all types of questions in the GAIA assessment. |
|
""" |
|
|
|
import os |
|
import sys |
|
import unittest |
|
from unittest.mock import patch, MagicMock |
|
import logging |
|
|
|
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|
|
|
|
|
from agent_integrated import GAIAIntegratedAgent |
|
|
|
|
|
logging.disable(logging.CRITICAL) |
|
|
|
class TestIntegratedAgent(unittest.TestCase): |
|
"""Test cases for the integrated GAIA agent.""" |
|
|
|
def setUp(self): |
|
"""Set up the test environment.""" |
|
|
|
self.test_config = { |
|
"verbose": True, |
|
"memory": { |
|
"use_supabase": False, |
|
"cache_enabled": True |
|
} |
|
} |
|
|
|
|
|
self.agent = GAIAIntegratedAgent(self.test_config) |
|
|
|
def tearDown(self): |
|
"""Clean up after tests.""" |
|
if hasattr(self, 'agent'): |
|
self.agent.reset() |
|
|
|
def test_agent_initialization(self): |
|
"""Test that the agent initializes correctly.""" |
|
self.assertTrue(self.agent.state["initialized"]) |
|
self.assertIsNotNone(self.agent.multimodal_processor) |
|
self.assertIsNotNone(self.agent.search_manager) |
|
self.assertIsNotNone(self.agent.memory_manager) |
|
self.assertIsNotNone(self.agent.text_analyzer) |
|
self.assertIsNotNone(self.agent.tools_registry) |
|
|
|
def test_reversed_text_questions(self): |
|
"""Test that the agent correctly handles reversed text questions.""" |
|
questions = [ |
|
"What does this reversed text say: ELPPA", |
|
"Decode the following: .ANANAB si sihT" |
|
] |
|
|
|
for question in questions: |
|
with self.subTest(question=question): |
|
answer = self.agent.process_question(question) |
|
self.assertIsNotNone(answer) |
|
self.assertTrue(len(answer) > 0) |
|
|
|
|
|
if "ELPPA" in question: |
|
self.assertIn("APPLE", answer.upper()) |
|
if "ANANAB" in question: |
|
self.assertIn("BANANA", answer.upper()) |
|
|
|
def test_unscramble_word_questions(self): |
|
"""Test that the agent correctly handles word unscrambling questions.""" |
|
questions = [ |
|
"Unscramble this word: ELPPA", |
|
"What is the correct spelling of ANANAB?" |
|
] |
|
|
|
for question in questions: |
|
with self.subTest(question=question): |
|
answer = self.agent.process_question(question) |
|
self.assertIsNotNone(answer) |
|
self.assertTrue(len(answer) > 0) |
|
|
|
|
|
if "ELPPA" in question: |
|
self.assertIn("APPLE", answer.upper()) |
|
if "ANANAB" in question: |
|
self.assertIn("BANANA", answer.upper()) |
|
|
|
@patch('src.gaia.agent.components.video_analyzer.VideoAnalyzer.analyze_video_content') |
|
def test_youtube_video_questions(self, mock_analyze): |
|
"""Test that the agent correctly handles YouTube video questions.""" |
|
|
|
mock_analyze.return_value = { |
|
"success": True, |
|
"content": "The video shows 3 bird species.", |
|
"metadata": { |
|
"title": "Bird Video", |
|
"duration": 120 |
|
} |
|
} |
|
|
|
questions = [ |
|
"In this YouTube video https://www.youtube.com/watch?v=dQw4w9WgXcQ, how many bird species are shown?", |
|
"What is the content of this video: https://youtu.be/dQw4w9WgXcQ" |
|
] |
|
|
|
for question in questions: |
|
with self.subTest(question=question): |
|
answer = self.agent.process_question(question) |
|
self.assertIsNotNone(answer) |
|
self.assertTrue(len(answer) > 0) |
|
self.assertIn("3", answer) |
|
|
|
@patch('src.gaia.agent.components.search_manager.SearchManager.search') |
|
def test_factual_questions(self, mock_search): |
|
"""Test that the agent correctly handles factual questions.""" |
|
|
|
mock_search.return_value = { |
|
"success": True, |
|
"answer": "Paris is the capital of France.", |
|
"sources": ["Wikipedia"] |
|
} |
|
|
|
questions = [ |
|
"What is the capital of France?", |
|
"Who wrote the novel '1984'?" |
|
] |
|
|
|
for question in questions: |
|
with self.subTest(question=question): |
|
answer = self.agent.process_question(question) |
|
self.assertIsNotNone(answer) |
|
self.assertTrue(len(answer) > 0) |
|
if "France" in question: |
|
self.assertIn("Paris", answer) |
|
|
|
@patch('src.gaia.agent.graph.run_agent_graph') |
|
def test_complex_questions(self, mock_graph): |
|
"""Test that the agent correctly handles complex questions using LangGraph.""" |
|
|
|
mock_graph.return_value = { |
|
"answer": "The answer requires multiple steps of reasoning.", |
|
"reasoning": "Step 1...\nStep 2...\nStep 3...", |
|
"tool_results": [] |
|
} |
|
|
|
questions = [ |
|
"Compare and contrast renewable and non-renewable energy sources.", |
|
"What are the implications of quantum computing for cryptography?" |
|
] |
|
|
|
for question in questions: |
|
with self.subTest(question=question): |
|
answer = self.agent.process_question(question) |
|
self.assertIsNotNone(answer) |
|
self.assertTrue(len(answer) > 0) |
|
|
|
@patch('src.gaia.agent.components.image_analyzer.ImageAnalyzer.process_image') |
|
def test_image_questions(self, mock_process): |
|
"""Test that the agent correctly handles image analysis questions.""" |
|
|
|
mock_process.return_value = { |
|
"success": True, |
|
"description": "The image shows a mountain landscape.", |
|
"elements": ["mountain", "sky", "trees"], |
|
"analysis_type": "general" |
|
} |
|
|
|
questions = [ |
|
"Analyze this image: /path/to/image.jpg", |
|
"What is shown in this picture: /path/to/photo.png?" |
|
] |
|
|
|
for question in questions: |
|
with self.subTest(question=question): |
|
|
|
with patch('os.path.exists', return_value=True): |
|
answer = self.agent.process_question(question) |
|
self.assertIsNotNone(answer) |
|
self.assertTrue(len(answer) > 0) |
|
self.assertIn("mountain", answer.lower()) |
|
|
|
@patch('src.gaia.agent.components.audio_analyzer.AudioAnalyzer.process_audio') |
|
def test_audio_questions(self, mock_process): |
|
"""Test that the agent correctly handles audio analysis questions.""" |
|
|
|
mock_process.return_value = { |
|
"success": True, |
|
"transcription": "This is a test audio recording.", |
|
"audio_type": "speech", |
|
"speakers": ["Speaker 1"] |
|
} |
|
|
|
questions = [ |
|
"Transcribe this audio file: /path/to/recording.mp3", |
|
"What is said in this audio: /path/to/audio.wav?" |
|
] |
|
|
|
for question in questions: |
|
with self.subTest(question=question): |
|
|
|
with patch('os.path.exists', return_value=True): |
|
answer = self.agent.process_question(question) |
|
self.assertIsNotNone(answer) |
|
self.assertTrue(len(answer) > 0) |
|
self.assertIn("test audio", answer.lower()) |
|
|
|
def test_error_handling(self): |
|
"""Test that the agent correctly handles errors.""" |
|
|
|
with patch.object(self.agent, '_initialize_components', side_effect=RuntimeError("Test error")): |
|
with self.assertRaises(RuntimeError): |
|
agent = GAIAIntegratedAgent(self.test_config) |
|
|
|
|
|
with patch.object(self.agent.text_analyzer, 'process_text_question', side_effect=Exception("Test error")): |
|
answer = self.agent.process_question("What does this reversed text say: ELPPA") |
|
self.assertIsNotNone(answer) |
|
self.assertTrue(len(answer) > 0) |
|
|
|
self.assertIn("Error", answer) |
|
|
|
def test_memory_caching(self): |
|
"""Test that the agent correctly caches and retrieves answers.""" |
|
|
|
question = "What is 2+2?" |
|
|
|
|
|
with patch.object(self.agent.memory_manager, 'get_cached_answer', return_value=None): |
|
with patch.object(self.agent.memory_manager, 'cache_question_answer') as mock_cache: |
|
answer = self.agent.process_question(question) |
|
self.assertIsNotNone(answer) |
|
|
|
mock_cache.assert_called_once() |
|
|
|
|
|
with patch.object(self.agent.memory_manager, 'get_cached_answer', return_value="The answer is 4.") as mock_get: |
|
answer = self.agent.process_question(question) |
|
self.assertEqual(answer, "The answer is 4.") |
|
|
|
mock_get.assert_called_once() |
|
|
|
def test_evaluation_questions(self): |
|
"""Test the agent against mock evaluation questions.""" |
|
|
|
evaluation_questions = [ |
|
"What is the capital of France?", |
|
"What does this reversed text say: ELPPA", |
|
"Unscramble this word: ANANAB", |
|
"In this YouTube video https://www.youtube.com/watch?v=dQw4w9WgXcQ, what's happening?", |
|
"How many bird species are visible in this YouTube video: https://youtu.be/dQw4w9WgXcQ?", |
|
"How many studio albums did Mercedes Sosa release between 2000 and 2009?", |
|
"What's shown in this image: /path/to/image.jpg?", |
|
"What is the content of this audio file: /path/to/audio.mp3?", |
|
"What is the position evaluation of this chess position: /path/to/chess.png?", |
|
"Compare and contrast renewable and non-renewable energy sources." |
|
] |
|
|
|
|
|
with patch('src.gaia.agent.components.video_analyzer.VideoAnalyzer.analyze_video_content', |
|
return_value={"success": True, "content": "Birds flying"}), \ |
|
patch('src.gaia.agent.components.image_analyzer.ImageAnalyzer.process_image', |
|
return_value={"success": True, "description": "An image"}), \ |
|
patch('src.gaia.agent.components.audio_analyzer.AudioAnalyzer.process_audio', |
|
return_value={"success": True, "transcription": "Audio content"}), \ |
|
patch('os.path.exists', return_value=True), \ |
|
patch('src.gaia.agent.components.search_manager.SearchManager.search', |
|
return_value={"success": True, "answer": "A factual answer"}), \ |
|
patch('src.gaia.agent.graph.run_agent_graph', |
|
return_value={"answer": "A complex answer"}): |
|
|
|
|
|
for question in evaluation_questions: |
|
with self.subTest(question=question): |
|
answer = self.agent.process_question(question) |
|
self.assertIsNotNone(answer) |
|
self.assertTrue(len(answer) > 0) |
|
|
|
if "Mercedes Sosa" in question: |
|
self.assertIn("7", answer) |
|
if "bird species" in question and "YouTube" in question: |
|
self.assertIn("3", answer) |
|
|
|
|
|
if __name__ == "__main__": |
|
unittest.main() |