Spaces:
Running
Running
""" | |
Calculator Accuracy Fix - TDD Approach | |
Identifies and fixes calculator accuracy issues to achieve 100% success rate. | |
""" | |
import pytest | |
import sys | |
import os | |
import logging | |
from pathlib import Path | |
# Add the deployment-ready directory to the path | |
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) | |
from agents.fixed_enhanced_unified_agno_agent import FixedGAIAAgent | |
logger = logging.getLogger(__name__) | |
class TestCalculatorFix: | |
"""Test suite to identify and fix calculator accuracy issues.""" | |
def setup_method(self): | |
"""Set up test fixtures.""" | |
self.agent = FixedGAIAAgent() | |
def test_basic_arithmetic_operations(self): | |
"""Test basic arithmetic operations that should always work.""" | |
test_cases = [ | |
{ | |
'question': 'What is 25 * 17?', | |
'expected': '425', | |
'operation': 'multiplication' | |
}, | |
{ | |
'question': 'What is 144 / 12?', | |
'expected': '12', | |
'operation': 'division' | |
}, | |
{ | |
'question': 'What is 100 + 50?', | |
'expected': '150', | |
'operation': 'addition' | |
}, | |
{ | |
'question': 'What is 200 - 75?', | |
'expected': '125', | |
'operation': 'subtraction' | |
} | |
] | |
failed_operations = [] | |
for case in test_cases: | |
if not self.agent.available: | |
pytest.skip("Agent not available for testing") | |
try: | |
result = self.agent(case['question']) | |
# Clean the result for comparison | |
cleaned_result = result.strip().replace(',', '') | |
expected = case['expected'] | |
# Check if the result matches | |
if cleaned_result != expected: | |
failed_operations.append({ | |
'question': case['question'], | |
'expected': expected, | |
'actual': cleaned_result, | |
'operation': case['operation'] | |
}) | |
logger.error(f"โ {case['operation']} failed: {case['question']} โ Expected: {expected}, Got: {cleaned_result}") | |
else: | |
logger.info(f"โ {case['operation']} passed: {case['question']} โ {cleaned_result}") | |
except Exception as e: | |
failed_operations.append({ | |
'question': case['question'], | |
'expected': case['expected'], | |
'actual': f"ERROR: {e}", | |
'operation': case['operation'] | |
}) | |
logger.error(f"โ {case['operation']} error: {case['question']} โ {e}") | |
# Report results | |
if failed_operations: | |
logger.error(f"โ Calculator accuracy: {len(test_cases) - len(failed_operations)}/{len(test_cases)} ({((len(test_cases) - len(failed_operations))/len(test_cases)*100):.1f}%)") | |
for failure in failed_operations: | |
logger.error(f" Failed: {failure['question']} โ Expected: {failure['expected']}, Got: {failure['actual']}") | |
else: | |
logger.info(f"โ Calculator accuracy: 100% ({len(test_cases)}/{len(test_cases)})") | |
# Assert no failures for 100% accuracy | |
assert len(failed_operations) == 0, f"Calculator failed {len(failed_operations)} out of {len(test_cases)} tests" | |
def test_complex_mathematical_operations(self): | |
"""Test complex mathematical operations.""" | |
test_cases = [ | |
{ | |
'question': 'What is 2^8?', | |
'expected': '256', | |
'operation': 'exponentiation' | |
}, | |
{ | |
'question': 'What is the square root of 144?', | |
'expected': '12', | |
'operation': 'square_root' | |
}, | |
{ | |
'question': 'Calculate the factorial of 5', | |
'expected': '120', | |
'operation': 'factorial' | |
} | |
] | |
failed_operations = [] | |
for case in test_cases: | |
if not self.agent.available: | |
pytest.skip("Agent not available for testing") | |
try: | |
result = self.agent(case['question']) | |
# Clean the result for comparison | |
cleaned_result = result.strip().replace(',', '') | |
expected = case['expected'] | |
# For complex operations, allow for slight variations | |
try: | |
result_num = float(cleaned_result) | |
expected_num = float(expected) | |
if abs(result_num - expected_num) < 0.01: | |
logger.info(f"โ {case['operation']} passed: {case['question']} โ {cleaned_result}") | |
continue | |
except ValueError: | |
pass | |
# Exact match check | |
if cleaned_result != expected: | |
failed_operations.append({ | |
'question': case['question'], | |
'expected': expected, | |
'actual': cleaned_result, | |
'operation': case['operation'] | |
}) | |
logger.error(f"โ {case['operation']} failed: {case['question']} โ Expected: {expected}, Got: {cleaned_result}") | |
else: | |
logger.info(f"โ {case['operation']} passed: {case['question']} โ {cleaned_result}") | |
except Exception as e: | |
failed_operations.append({ | |
'question': case['question'], | |
'expected': case['expected'], | |
'actual': f"ERROR: {e}", | |
'operation': case['operation'] | |
}) | |
logger.error(f"โ {case['operation']} error: {case['question']} โ {e}") | |
# Report results | |
success_rate = (len(test_cases) - len(failed_operations)) / len(test_cases) * 100 | |
logger.info(f"๐ Complex math accuracy: {success_rate:.1f}% ({len(test_cases) - len(failed_operations)}/{len(test_cases)})") | |
if failed_operations: | |
for failure in failed_operations: | |
logger.error(f" Failed: {failure['question']} โ Expected: {failure['expected']}, Got: {failure['actual']}") | |
def test_calculator_tool_direct_access(self): | |
"""Test direct access to calculator tool to identify issues.""" | |
if not self.agent.available: | |
pytest.skip("Agent not available for testing") | |
# Find calculator tool | |
calculator_tool = None | |
for tool in self.agent.tools: | |
if hasattr(tool, '__class__') and 'Calculator' in tool.__class__.__name__: | |
calculator_tool = tool | |
break | |
if calculator_tool is None: | |
pytest.fail("Calculator tool not found in agent tools") | |
logger.info(f"โ Calculator tool found: {calculator_tool.__class__.__name__}") | |
# Test direct calculator operations | |
test_operations = [ | |
('25 * 17', 425), | |
('144 / 12', 12), | |
('2 ** 8', 256), | |
('100 + 50', 150) | |
] | |
for expression, expected in test_operations: | |
try: | |
# This would depend on the calculator tool's interface | |
logger.info(f"๐งฎ Testing calculator: {expression} = {expected}") | |
except Exception as e: | |
logger.error(f"โ Calculator tool error: {e}") | |
if __name__ == "__main__": | |
# Run the calculator fix tests | |
pytest.main([__file__, "-v", "-s"]) |