gaia-enhanced-agent / tests /test_calculator_fix.py
GAIA Agent Deployment
Deploy Complete Enhanced GAIA Agent with Phase 1-6 Improvements
9a6a4dc
"""
Calculator Accuracy Fix - TDD Approach
Identifies and fixes calculator accuracy issues to achieve 100% success rate.
"""
import pytest
import sys
import os
import logging
from pathlib import Path
# Add the deployment-ready directory to the path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent
logger = logging.getLogger(__name__)
class TestCalculatorFix:
"""Test suite to identify and fix calculator accuracy issues."""
@pytest.fixture(autouse=True)
def setup_method(self):
"""Set up test fixtures."""
self.agent = FixedGAIAAgent()
def test_basic_arithmetic_operations(self):
"""Test basic arithmetic operations that should always work."""
test_cases = [
{
'question': 'What is 25 * 17?',
'expected': '425',
'operation': 'multiplication'
},
{
'question': 'What is 144 / 12?',
'expected': '12',
'operation': 'division'
},
{
'question': 'What is 100 + 50?',
'expected': '150',
'operation': 'addition'
},
{
'question': 'What is 200 - 75?',
'expected': '125',
'operation': 'subtraction'
}
]
failed_operations = []
for case in test_cases:
if not self.agent.available:
pytest.skip("Agent not available for testing")
try:
result = self.agent(case['question'])
# Clean the result for comparison
cleaned_result = result.strip().replace(',', '')
expected = case['expected']
# Check if the result matches
if cleaned_result != expected:
failed_operations.append({
'question': case['question'],
'expected': expected,
'actual': cleaned_result,
'operation': case['operation']
})
logger.error(f"โŒ {case['operation']} failed: {case['question']} โ†’ Expected: {expected}, Got: {cleaned_result}")
else:
logger.info(f"โœ… {case['operation']} passed: {case['question']} โ†’ {cleaned_result}")
except Exception as e:
failed_operations.append({
'question': case['question'],
'expected': case['expected'],
'actual': f"ERROR: {e}",
'operation': case['operation']
})
logger.error(f"โŒ {case['operation']} error: {case['question']} โ†’ {e}")
# Report results
if failed_operations:
logger.error(f"โŒ Calculator accuracy: {len(test_cases) - len(failed_operations)}/{len(test_cases)} ({((len(test_cases) - len(failed_operations))/len(test_cases)*100):.1f}%)")
for failure in failed_operations:
logger.error(f" Failed: {failure['question']} โ†’ Expected: {failure['expected']}, Got: {failure['actual']}")
else:
logger.info(f"โœ… Calculator accuracy: 100% ({len(test_cases)}/{len(test_cases)})")
# Assert no failures for 100% accuracy
assert len(failed_operations) == 0, f"Calculator failed {len(failed_operations)} out of {len(test_cases)} tests"
def test_complex_mathematical_operations(self):
"""Test complex mathematical operations."""
test_cases = [
{
'question': 'What is 2^8?',
'expected': '256',
'operation': 'exponentiation'
},
{
'question': 'What is the square root of 144?',
'expected': '12',
'operation': 'square_root'
},
{
'question': 'Calculate the factorial of 5',
'expected': '120',
'operation': 'factorial'
}
]
failed_operations = []
for case in test_cases:
if not self.agent.available:
pytest.skip("Agent not available for testing")
try:
result = self.agent(case['question'])
# Clean the result for comparison
cleaned_result = result.strip().replace(',', '')
expected = case['expected']
# For complex operations, allow for slight variations
try:
result_num = float(cleaned_result)
expected_num = float(expected)
if abs(result_num - expected_num) < 0.01:
logger.info(f"โœ… {case['operation']} passed: {case['question']} โ†’ {cleaned_result}")
continue
except ValueError:
pass
# Exact match check
if cleaned_result != expected:
failed_operations.append({
'question': case['question'],
'expected': expected,
'actual': cleaned_result,
'operation': case['operation']
})
logger.error(f"โŒ {case['operation']} failed: {case['question']} โ†’ Expected: {expected}, Got: {cleaned_result}")
else:
logger.info(f"โœ… {case['operation']} passed: {case['question']} โ†’ {cleaned_result}")
except Exception as e:
failed_operations.append({
'question': case['question'],
'expected': case['expected'],
'actual': f"ERROR: {e}",
'operation': case['operation']
})
logger.error(f"โŒ {case['operation']} error: {case['question']} โ†’ {e}")
# Report results
success_rate = (len(test_cases) - len(failed_operations)) / len(test_cases) * 100
logger.info(f"๐Ÿ“Š Complex math accuracy: {success_rate:.1f}% ({len(test_cases) - len(failed_operations)}/{len(test_cases)})")
if failed_operations:
for failure in failed_operations:
logger.error(f" Failed: {failure['question']} โ†’ Expected: {failure['expected']}, Got: {failure['actual']}")
def test_calculator_tool_direct_access(self):
"""Test direct access to calculator tool to identify issues."""
if not self.agent.available:
pytest.skip("Agent not available for testing")
# Find calculator tool
calculator_tool = None
for tool in self.agent.tools:
if hasattr(tool, '__class__') and 'Calculator' in tool.__class__.__name__:
calculator_tool = tool
break
if calculator_tool is None:
pytest.fail("Calculator tool not found in agent tools")
logger.info(f"โœ… Calculator tool found: {calculator_tool.__class__.__name__}")
# Test direct calculator operations
test_operations = [
('25 * 17', 425),
('144 / 12', 12),
('2 ** 8', 256),
('100 + 50', 150)
]
for expression, expected in test_operations:
try:
# This would depend on the calculator tool's interface
logger.info(f"๐Ÿงฎ Testing calculator: {expression} = {expected}")
except Exception as e:
logger.error(f"โŒ Calculator tool error: {e}")
if __name__ == "__main__":
# Run the calculator fix tests
pytest.main([__file__, "-v", "-s"])